Introduction

We look for genes associated with fibroblast activation in several single-cell datasets.

Preliminaries

>

Setup

Packages

suppressPackageStartupMessages({
  # install.packages("BiocManager")

  # install.packages("kableExtra")
  requireNamespace("kableExtra")

  # ?
  # devtools::install_github("rstudio/crosstalk")
  # library("crosstalk")
  # library("htmlwidgets")

  # install.packages("crunch")
  # requireNamespace("crunch")

  # https://rstudio.github.io/DT/
  # install.packages("DT")
  requireNamespace("DT")

  # install.packages("RCurl")
  requireNamespace("RCurl")

  # install.packages("dplyr")
  library(dplyr)

  # install.packages("ggplot2")
  library(ggplot2)

  # install.packages("ggsci")
  requireNamespace("ggsci")
  # install.packages("ggtext")
  requireNamespace("ggtext")
  # install.packages("ggrepel")
  requireNamespace("ggrepel")

  # install.packages("assertthat")
  requireNamespace("assertthat")

  #
  requireNamespace("magrittr")
  `%<>%` <- magrittr::`%<>%`
  `%>%` <- magrittr::`%>%`

  # install.packages("glue")
  requireNamespace("glue")
  glue <- glue::glue

  # install.packages("tidyr")
  requireNamespace("tidyr")

  # BiocManager::install("org.Mm.eg.db")
  # BiocManager::install("org.Hs.eg.db")
  requireNamespace("org.Mm.eg.db")
  requireNamespace("org.Hs.eg.db")

  # https://www.bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
  # BiocManager::install("goseq")
  # requireNamespace("goseq")

  # install.packages("pracma")
  requireNamespace("pracma")

  # https://vroom.r-lib.org/articles/vroom.html
  # install.packages("vroom")
  requireNamespace("vroom")

  # BiocManager::install("DESeq2")
  requireNamespace("DESeq2")

  # BiocManager::install("scater")
  requireNamespace("scater")

  # BiocManager::install("scran")
  # requireNamespace("scran")

  # BiocManager::install("igraph")
  requireNamespace("igraph")

  # install.packages("pheatmap")
  requireNamespace("pheatmap")

  # install.packages("gridExtra")
  requireNamespace("gridExtra")

  # install.packages("pathlibr")
  requireNamespace("pathlibr")
  # install.packages("stringr")
  requireNamespace("stringr")
  # install.packages("reshape2")
  requireNamespace("reshape2")

  # install.packages("pbapply")
  # requireNamespace("pbapply")

  # install.packages("Rtsne")
  requireNamespace("Rtsne")
  # install.packages("uwot")
  requireNamespace("uwot")

  # avoid importing `exprs` that leads to clashes
  requireNamespace("rlang")

  # install.packages("Seurat")
  # requireNamespace("Seurat")

  # BiocManager::install("TrajectoryUtils") # Didn't work
  # devtools::install_github("LTLA/TrajectoryUtils@e943606")
  # BiocManager::install("kstreet13/slingshot@25fc566")
  # requireNamespace("slingshot")

  # BiocManager::install("tradeSeq")
  # requireNamespace("tradeSeq")

  # install.packages("statmod")

  # BiocManager::install("edgeR")
  requireNamespace("edgeR")

  # https://github.com/eliocamp/ggnewscale
  # install.packages("ggnewscale")
  # requireNamespace("ggnewscale")

  # https://www.rdocumentation.org/packages/DGCA/versions/1.0.2
  # BiocManager::install("impute")
  # BiocManager::install("preprocessCore")
  # BiocManager::install("GO.db")
  # install.packages("WGCNA")
  # install.packages("DGCA")
  requireNamespace("DGCA")


  # https://github.com/danielschw188/Revelio
  # devtools::install_github('danielschw188/Revelio@e8e1492')
  requireNamespace("Revelio")

  # # install.packages("rgl")
  # requireNamespace("rgl")

  # install.packages("plotly")
  requireNamespace("plotly")

  # https://www.rdocumentation.org/packages/Hmisc/versions/4.5-0/topics/rcorr
  # install.packages("Hmisc")
  requireNamespace("Hmisc")

  # BiocManager::install("multiGSEA")
  # requireNamespace("multiGSEA")

  # install.packages("tidygraph")
  # install.packages("ggraph")
  requireNamespace("ggraph")
  requireNamespace("tidygraph")

  # https://htmlpreview.github.io/?https://github.com/aertslab/SCENIC/blob/master/inst/doc/SCENIC_Setup.html
  # BiocManager::install(c("AUCell", "RcisTarget"))
  # BiocManager::install(c("GRNBoost"))
  # BiocManager::install(c("doMC", "doRNG"))
  # devtools::install_github("aertslab/SCENIC@0585e87")
  # requireNamespace("SCENIC")
})
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/glue/libs/glue.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/xml2/libs/xml2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/colorspace/libs/colorspace.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/systemfonts/libs/systemfonts.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/svglite/libs/svglite.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bitops/libs/bitops.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RCurl/libs/RCurl.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vctrs/libs/vctrs.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ellipsis/libs/ellipsis.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fansi/libs/fansi.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/utf8/libs/utf8.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tibble/libs/tibble.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/purrr/libs/purrr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/dplyr/libs/dplyr.so") ...
## now dyn.load("/usr/lib/R/library/grid/libs/grid.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rcpp/libs/Rcpp.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/gridtext/libs/gridtext.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ggrepel/libs/ggrepel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidyr/libs/tidyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit/libs/bit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit64/libs/bit64.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastmap/libs/fastmap.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/cachem/libs/cachem.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSQLite/libs/RSQLite.so") ...
## now dyn.load("/usr/lib/R/library/parallel/libs/parallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biobase/libs/Biobase.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/S4Vectors/libs/S4Vectors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/IRanges/libs/IRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/png/libs/png.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XVector/libs/XVector.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biostrings/libs/Biostrings.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vroom/libs/vroom.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocParallel/libs/BiocParallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/GenomicRanges/libs/GenomicRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/matrixStats/libs/matrixStats.so") ...
## now dyn.load("/usr/lib/R/library/lattice/libs/lattice.so") ...
## now dyn.load("/usr/lib/R/library/Matrix/libs/Matrix.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DelayedArray/libs/DelayedArray.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XML/libs/XML.so") ...
## now dyn.load("/usr/lib/R/library/splines/libs/splines.so") ...
## now dyn.load("/usr/lib/R/library/survival/libs/survival.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/genefilter/libs/genefilter.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/locfit/libs/locfit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DESeq2/libs/DESeq2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/sparseMatrixStats/libs/sparseMatrixStats.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/beachmat/libs/beachmat.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/scuttle/libs/scuttle.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocNeighbors/libs/BiocNeighbors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/irlba/libs/irlba.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocSingular/libs/BiocSingular.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/igraph/libs/igraph.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/plyr/libs/plyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/reshape2/libs/reshape2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rtsne/libs/Rtsne.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/uwot/libs/uwot.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/limma/libs/limma.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/edgeR/libs/edgeR.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastcluster/libs/fastcluster.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/base64enc/libs/base64enc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/jpeg/libs/jpeg.so") ...
## now dyn.load("/usr/lib/R/library/cluster/libs/cluster.so") ...
## now dyn.load("/usr/lib/R/library/foreign/libs/foreign.so") ...
## now dyn.load("/usr/lib/R/library/nnet/libs/nnet.so") ...
## now dyn.load("/usr/lib/R/library/rpart/libs/rpart.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/checkmate/libs/checkmate.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/backports/libs/backports.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/data.table/libs/datatable.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Hmisc/libs/Hmisc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/impute/libs/impute.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/preprocessCore/libs/preprocessCore.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/WGCNA/libs/WGCNA.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/lazyeval/libs/lazyeval.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidygraph/libs/tidygraph.so") ...

Session

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8    LC_NUMERIC=C            LC_TIME=lzh_TW         
##  [4] LC_COLLATE=en_US.UTF-8  LC_MONETARY=lzh_TW      LC_MESSAGES=en_US.UTF-8
##  [7] LC_PAPER=lzh_TW         LC_NAME=C               LC_ADDRESS=C           
## [10] LC_TELEPHONE=C          LC_MEASUREMENT=lzh_TW   LC_IDENTIFICATION=C    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_3.3.5 dplyr_1.0.7  
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.2.1             Hmisc_4.5-0                
##   [3] systemfonts_1.0.2           plyr_1.8.6                 
##   [5] igraph_1.2.6                lazyeval_0.2.2             
##   [7] splines_4.1.0               Revelio_0.1.0              
##   [9] BiocParallel_1.26.0         GenomeInfoDb_1.28.0        
##  [11] scater_1.20.0               digest_0.6.27              
##  [13] foreach_1.5.1               htmltools_0.5.1.1          
##  [15] viridis_0.6.1               GO.db_3.13.0               
##  [17] fansi_0.5.0                 checkmate_2.0.0            
##  [19] magrittr_2.0.1              memoise_2.0.0              
##  [21] ScaledMatrix_1.0.0          cluster_2.1.2              
##  [23] doParallel_1.0.16           limma_3.48.0               
##  [25] fastcluster_1.2.3           Biostrings_2.60.1          
##  [27] annotate_1.70.0             matrixStats_0.59.0         
##  [29] vroom_1.4.0                 svglite_2.0.0              
##  [31] jpeg_0.1-8.1                colorspace_2.0-2           
##  [33] blob_1.2.1                  rvest_1.0.0                
##  [35] ggrepel_0.9.1               xfun_0.23                  
##  [37] crayon_1.4.1                RCurl_1.98-1.3             
##  [39] jsonlite_1.7.2              org.Mm.eg.db_3.13.0        
##  [41] genefilter_1.74.0           impute_1.66.0              
##  [43] survival_3.2-11             iterators_1.0.13           
##  [45] glue_1.4.2                  kableExtra_1.3.4           
##  [47] gtable_0.3.0                zlibbioc_1.38.0            
##  [49] XVector_0.32.0              webshot_0.5.2              
##  [51] DelayedArray_0.18.0         BiocSingular_1.8.0         
##  [53] SingleCellExperiment_1.14.1 BiocGenerics_0.38.0        
##  [55] scales_1.1.1                pheatmap_1.0.12            
##  [57] DBI_1.1.1                   edgeR_3.34.0               
##  [59] Rcpp_1.0.6                  htmlTable_2.2.1            
##  [61] viridisLite_0.4.0           xtable_1.8-4               
##  [63] gridtext_0.1.4              foreign_0.8-81             
##  [65] bit_4.0.4                   rsvd_1.0.5                 
##  [67] preprocessCore_1.54.0       Formula_1.2-4              
##  [69] stats4_4.1.0                DT_0.18                    
##  [71] htmlwidgets_1.5.3           httr_1.4.2                 
##  [73] RColorBrewer_1.1-2          ellipsis_0.3.2             
##  [75] pkgconfig_2.0.3             XML_3.99-0.6               
##  [77] scuttle_1.2.0               nnet_7.3-16                
##  [79] sass_0.4.0                  uwot_0.1.10                
##  [81] locfit_1.5-9.4              utf8_1.2.1                 
##  [83] dynamicTreeCut_1.63-1       tidyselect_1.1.1           
##  [85] rlang_0.4.11                reshape2_1.4.4             
##  [87] AnnotationDbi_1.54.1        munsell_0.5.0              
##  [89] tools_4.1.0                 cachem_1.0.5               
##  [91] generics_0.1.0              RSQLite_2.2.7              
##  [93] evaluate_0.14               stringr_1.4.0              
##  [95] fastmap_1.1.0               yaml_2.2.1                 
##  [97] org.Hs.eg.db_3.13.0         knitr_1.33                 
##  [99] bit64_4.0.5                 tidygraph_1.2.0            
## [101] purrr_0.3.4                 KEGGREST_1.32.0            
## [103] sparseMatrixStats_1.4.0     pracma_2.3.3               
## [105] xml2_1.3.2                  compiler_4.1.0             
## [107] rstudioapi_0.13             plotly_4.9.3               
## [109] beeswarm_0.3.1              png_0.1-7                  
## [111] tibble_3.1.2                geneplotter_1.70.0         
## [113] bslib_0.2.5.1               stringi_1.6.2              
## [115] lattice_0.20-44             DGCA_1.0.2                 
## [117] Matrix_1.3-4                ggsci_2.9                  
## [119] vctrs_0.3.8                 pillar_1.6.1               
## [121] lifecycle_1.0.0             jquerylib_0.1.4            
## [123] BiocNeighbors_1.10.0        data.table_1.14.0          
## [125] bitops_1.0-7                irlba_2.3.3                
## [127] GenomicRanges_1.44.0        R6_2.5.0                   
## [129] latticeExtra_0.6-29         gridExtra_2.3              
## [131] vipor_0.4.5                 IRanges_2.26.0             
## [133] codetools_0.2-18            assertthat_0.2.1           
## [135] SummarizedExperiment_1.22.0 DESeq2_1.32.0              
## [137] withr_2.4.2                 S4Vectors_0.30.0           
## [139] GenomeInfoDbData_1.2.6      parallel_4.1.0             
## [141] ggtext_0.1.1                rpart_4.1-15               
## [143] grid_4.1.0                  pathlibr_0.1.0             
## [145] beachmat_2.8.0              tidyr_1.1.3                
## [147] rmarkdown_2.8               DelayedMatrixStats_1.14.0  
## [149] MatrixGenerics_1.4.0        Rtsne_0.15                 
## [151] Biobase_2.52.0              WGCNA_1.70-3               
## [153] base64enc_0.1-3             ggbeeswarm_0.6.0

Generic

RNG
set.seed(43)
Dual of the python star-star operator, sort of
dual <- function(f, g) {
  if (missing(g)) {
    stopifnot(class(f) == "function")
    (function(L) do.call(f, Filter(Negate(is.null), L)))
  } else {
    stopifnot(class(f) == "list")
    stopifnot(class(g) == "function")
    dual(g)(f)
  }
}

Example 1:

list(
  biggle = list(A = "hello", B = "world"),
  wiggle = list(A = "Hello", B = "World", C = NULL)
) %>%
  lapply(dual(function(B, A) paste(A, B)))
## $biggle
## [1] "hello world"
## 
## $wiggle
## [1] "Hello World"

Example 2:

list(
  A = data.frame(x = c("xa1", "xa2"), y = c("ya1", "ya2")),
  B = data.frame(x = c("xb1", "xb2"), y = c("yb1", "yb2")),
  C = data.frame(x = c("xc1", "xc2"), y = c("yc1", "yc2"))
) %>%
  # No need for the brackets!
  dual(cbind)
##   A.x A.y B.x B.y C.x C.y
## 1 xa1 ya1 xb1 yb1 xc1 yc1
## 2 xa2 ya2 xb2 yb2 xc2 yc2
Memory drain culprits
culprits <- function() {
  search()[[1]] %>%
    ls(name = .) %>%
    sapply(. %>% get() %>% object.size() %>% "/"(1e6)) %>%
    data.frame(size = .) %>%
    arrange(size) %>%
    mutate(cumsize = cumsum(size)) %>%
    arrange(desc(size)) %>%
    mutate(across(everything(), ~ signif(.x, digits = 2))) %>%
    mutate(unit = "Mb")
}

Example:

culprits() %>% head(3)
##           size cumsize unit
## dual     0.074   0.120   Mb
## glue     0.022   0.048   Mb
## culprits 0.011   0.026   Mb

Paths

for (i in 1:2) {
  BASEPATH <- pathlibr::Path$new(Sys.glob(paste(c("work", ".."), "*FibroAct", sep = "/")))
  setwd(BASEPATH$show)
}

print(paste("Work path:", BASEPATH))
## [1] "Work path: ../20210502-FibroAct"
stopifnot("main.Rmd" %in% names(BASEPATH$dir))
path_to <- (function(.) Sys.glob(BASEPATH$join("../..")$join(.)$show))
out_file <- (function(.) BASEPATH$join("output")$join(.)$show)
dir.create(out_file(""), showWarnings = FALSE)

Tables

Basic table printing.

kable <- function(.) {
  kableExtra::kbl(., align = "c") %>%
    kableExtra::kable_paper("hover", full_width = FALSE)
}

Options for fancy datatable printing.

options(DT.options = list(
  pageLength = 16,
  language = list(search = "Filter:"),
  scrollX = TRUE
))

Converting a symbol or GO ID to a link.

as.link <-
  function(x) {
    dplyr::case_when(
      # If x is like GO:003453
      startsWith(x, "GO:") ~ paste0(
        "<a target='_blank'",
        "href='", "http://amigo.geneontology.org/amigo/term/", x, "'",
        ">", x, "</a>"
      ),
      # Otherwise try this
      TRUE ~ paste0(
        "<a target='_blank'",
        "href='", "http://www.informatics.jax.org/marker/summary?nomen=", x, "'",
        ">", x, "</a>"
      )
    )
  }

Example

data.frame(x = c("GO:00345", "Lama")) %>%
  mutate(link = as.link(x)) %>%
  DT::datatable(width = "100%")

Printing a sequence of tables

kable_all <- function(tt) {
  for (t in tt) {
    t %>%
      table() %>%
      t() %>%
      kable() %>%
      print()
  }
}
# with results='asis'
list(
  a = c(c(1:5), c(2:5), c(3:5)),
  b = c(c(1:6), c(2:6), c(3:6)),
  b = c(c(1:7), c(2:7), c(3:7)),
  b = c(c(1:9), c(2:9), c(3:9))
) %>% kable_all()
1 2 3 4 5
1 2 3 3 3
1 2 3 4 5 6
1 2 3 3 3 3
1 2 3 4 5 6 7
1 2 3 3 3 3 3
1 2 3 4 5 6 7 8 9
1 2 3 3 3 3 3 3 3

Plotting

ggplot2::theme_set(theme_light(base_size = 15))
gglast <- function(what) {
  p <- ggplot2::last_plot()
  q <- p$mapping[[deparse(substitute(what))]]
  rlang::eval_tidy(q, data = p$data)
}

# example: scale_fill_manual(values = color_map, breaks = gglast(fill))
# Converts an annotation column (e.g. sce$group)
# to a named vector of colors (group -> color)
# List of palettes: grDevices::hcl.pals()
# http://colorspace.r-forge.r-project.org/reference/hcl_palettes.html
ramp <- function(anno, palette = "Temps") {
  anno %>%
    as.character() %>%
    unique() %>%
    c(NA) %>%
    data.frame(
      item = .,
      # https://developer.r-project.org/Blog/public/2019/04/01/hcl-based-color-palettes-in-grdevices/
      color = length(.) %>% colorRampPalette(grDevices::hcl.colors(n = ., palette = palette))(.)
    ) %>%
    tidyr::drop_na() %>%
    pull(color, item)
}

Example:

ramp(c("Grp1", "Grp2", "Grp1"), palette = "Dark 3") %>%
  t() %>%
  kable()
Grp1 Grp2
#E16A86 #50A315
RGB <- function(arr, alpha = 1) {
  stopifnot(ncol(arr) == 3)

  arr %>%
    as.data.frame() %>%
    `/`(255) %>%
    mutate(alpha = alpha) %>%
    mutate(colors = apply(., 1, \(x) rgb(x[1], x[2], x[3], alpha = x[["alpha"]]))) %>%
    pull(colors)
}

Example:

(1:7) %>%
  percent_rank() %>%
  (colorRamp(c("blue", "red"))) %>%
  RGB(alpha = 0.5) %>%
  t() %>%
  kable()
#0000FF80 #2B00D580 #5500AA80 #80008080 #AA005580 #D5002A80 #FF000080

Colors for consistency; may be overwritten below.

colors <- unlist(
  c(
    list(
      `374_VLMC` = "aquamarine3",
      `375_VLMC` = "chartreuse4",
      `376_VLMC` = "chartreuse3",
      `FB1` = "cornflowerblue",
      `FB2` = "blue",
      `Ctrl` = "chartreuse4",
      `EAE` = "coral2",
      `VLMC1` = "chartreuse1",
      `VLMC3` = "coral3",
      `VLMC4` = "coral4",
      `healthy` = "green",
      `EAE1` = "coral1",
      `EAE2` = "coral3",
      `healthy1` = "aquamarine",
      `healthy2` = "chartreuse3",
      `healthy3` = "chartreuse4",
      `positive` = "Red4",
      `negative` = "Steelblue1"
    ),
    ramp(c("G1.S", "S", "G2", "G2.M", "M.G1"), palette = "Dark 3")
  )
)
colors %>%
  unlist() %>%
  data.frame(c = .) %>%
  mutate(item = rownames(.)) %>%
  ggplot(aes(y = item, x = 1)) +
  geom_tile(fill = colors, stat = "identity") +
  scale_y_discrete(limits = rev(names(colors))) +
  theme(
    axis.ticks.x = element_blank(),
    axis.text.x = element_blank(),
    axis.title = element_blank()
  )
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/farver/libs/farver.so") ...

Zoomable figures.

///*
$(document).ready(
  function() {
    $("img.zoomable").on("click", function(evt) {
      if (evt.originalEvent.shiftKey) {
        var el = $(this).clone();
        el.attr("width", "2048px");
        window.open().document.write(el.wrap('<p>').parent().html());
        return false;
      }
    });
  }
);
//*/

Shift+click on a figure to zoom:

list(
  randu %>% ggplot(aes(x = x, y = y)) +
    geom_point() +
    ggtitle("Zoomable 1"),
  randu %>% ggplot(aes(x = x, y = z)) +
    geom_point() +
    ggtitle("Zoomable 2")
)

Click-scrollable figures (like slides):

$(document).ready(
  function() {
    $("img.scrollable").css({
      'position': 'absolute', 'top': 0, 'left': 0, 'z-index': 100000,
    });
    
    $("img.scrollable").parent().css({'position': 'relative'});
    $("img.scrollable").parent().find('img:first').css({'position': 'relative'});
    
    $("img.scrollable").on("click", function(evt) {
      if (!evt.originalEvent.altKey && !evt.originalEvent.shiftKey) {
        $(this).css({'z-index': parseInt($(this).css('z-index'), 10) - 1});
        evt.stopPropagation();
        return false;
      };
      
      return true;
    });
  }
);

Click a figure to slide:

list(
  randu %>% ggplot(aes(x = x, y = y)) +
    geom_point() +
    ggtitle("Slide 1"),
  randu %>% ggplot(aes(x = x, y = z)) +
    geom_point() +
    ggtitle("Slide 2")
)

Dataframes

# Use first column as index
by_col1 <- (function(.) tibble::column_to_rownames(., colnames(.)[1]))
# Use index as new column `name`
ind2col <- (function(., name) tibble::rownames_to_column(., var = name))
take_rows <- function(df, rows = NULL) {
  if (!is.null(rows)) {
    return(df[rows[rows %in% rownames(df)], , drop = FALSE])
  } else {
    return(df)
  }
}
# For reading wide tables
Sys.setenv("VROOM_CONNECTION_SIZE" = 2 ** 18)

from_tsv <- function(filename, sep = "\t") {
  filename %>%
    vroom::vroom(del = sep, progress = FALSE) %>%
    suppressMessages() %>%
    by_col1()
}
# Writes dataframe to tab-separated file
# Precede by `ind2col("name for index")` to write the row names
to_tsv <- function(df, fn, sep = "\t") {
  vroom::vroom_write(df, out_file(fn), delim = sep)
}
Random sub-dataframe
# A small random sub-dataframe (intended for genes x samples)
shample <- function(df) {
  df[
    sample(rownames(df), min(nrow(.), 333)),
    sample(colnames(df), min(ncol(.), 33))
  ]
}

SCE

# Constructs a `SingleCellExperiment` checking consistency of sample names
make_sce <- function(counts, coldata) {
  # Check that samples in counts = genes x samples
  # are ordered as in coldata = samples x metafields
  stopifnot(assertthat::are_equal(colnames(counts), rownames(coldata)))

  SingleCellExperiment::SingleCellExperiment(
    assays = list(counts = counts), colData = coldata
  )
}
mock_sce <- function(genes, ncells) {
  sce <- scater::mockSCE(ngenes = length(genes), ncells = ncells)

  sce <- make_sce(
    sce@assays@data$counts %>% as.data.frame(row.names = genes),
    sce@colData %>% as.data.frame() %>% rename(celltype = Mutation_Status)
  )

  sce
}
drop_empty <- function(sce) {
  sce[, colSums(abs(sce@assays@data$counts)) != 0]
}
# Normalizes samplewise to sum(expr) = 1
# Consider using sce %>% scater::logNormCounts() %>% ...
norm1 <- function(sce) {
  sce@assays@data$counts %<>%
    t() %>%
    {
      (.) / (rowSums(.))
    } %>%
    t()

  sce
}
# Example
mock_sce(genes = LETTERS[1:10], ncells = 3) %>%
  take_rows(LETTERS[1:5]) %>%
  drop_empty() %>%
  norm1() %>%
  scater::logNormCounts() %>%
  scater::aggregateAcrossFeatures(ids = c("V", "C", "C", "C", "V"), average = TRUE)
## class: SingleCellExperiment 
## dim: 2 3 
## metadata(0):
## assays(1): counts
## rownames(2): C V
## rowData names(0):
## colnames(3): Cell_001 Cell_002 Cell_003
## colData names(4): celltype Cell_Cycle Treatment sizeFactor
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Gene meta

Taxons

taxid <- list(mm = 10090, hs = 9606)
mm hs
10090 9606

EntrezID

symbol2geneid <- function(genes) {
  data.frame(symbol = genes) %>%
    merge(
      y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
      all.x = TRUE,
      by = "symbol",
      sort = FALSE # !
    ) %>%
    pull(gene_id, symbol)
}
c("Jun", "Junb", "Junc") %>%
  data.frame(symbol = .) %>%
  mutate(gene_id = symbol2geneid(symbol)) %>%
  kable()
symbol gene_id
Jun 16476
Junb 16477
Junc NA

Omnipath

PPI
omnipath.in <-
  path_to("*/*/*/omnipath_in.csv.gz") %>%
  from_tsv() %>%
  select(
    source, source_genesymbol, target, target_genesymbol,
    organism,
    is_inhibition, is_stimulation
  )

Preview:

TF-target
omnipath.tf <-
  path_to("*/*/*/omnipath_tf.csv.gz") %>%
  from_tsv() %>%
  select(
    source, source_genesymbol, target, target_genesymbol,
    organism,
    is_inhibition, is_stimulation
  )

Preview:

Ligand-receptor
omnipath.lr <-
  path_to("*/*/*/omnipath_lr.csv.gz") %>%
  from_tsv()

Preview:

Annotating genes

We use Omnipath to annotate genes with their role using the following function.

# For a vector of gene symbols returns
# a vector containing the role of each symbol
symbol2role <- function(symbol, organism = taxid$mm) {
  f <- function(df) {
    df %>% filter(organism == organism)
  }

  data.frame(
    TF = if_else(symbol %in% f(omnipath.tf)$source_genesymbol, "TF", ""),
    Tgt = if_else(symbol %in% f(omnipath.tf)$target_genesymbol, "Tgt", ""),
    Lig = if_else(symbol %in% f(omnipath.lr)$source_genesymbol, "Lig", ""),
    Rec = if_else(symbol %in% f(omnipath.lr)$target_genesymbol, "Rec", "")
  ) %>%
    apply(MARGIN = 1, FUN = function(row) {
      if (any(nzchar(row))) {
        paste(row[nzchar(row)], collapse = "/")
      } else {
        "--"
      }
    })
}

For example:

data.frame(symbol = c("Jun", "Notch1", "Xyz")) %>%
  mutate(role = symbol2role(symbol)) %>%
  kable()
symbol role
Jun TF/Tgt
Notch1 Tgt/Lig/Rec
Xyz

Annotation

The following provides a gene name (description) and a longer explanation (summary).

gene.summary <-
  taxid %>%
  lapply(
    function(i) {
      path_to(paste0("*/NCBI/gene/*/", names(which(. == i)), ".tsv.gz")) %>%
        from_tsv() %>%
        ind2col("gene_id")
    }
  )

Preview:

The following function sets up a gene id/symbol homology table.

# homology(taxid$mm, taxid$hs) %>% head
# gives a dataframe like
#   gene_id.y gene_id.x  symbol.x symbol.y
# 1         1    117586      A1bg     A1BG
# 2         2    232345       A2m      A2M
# 3         9     17961      Nat2     NAT1
# 4        10     17960      Nat1     NAT2
# 5        12     16625 Serpina3c SERPINA3
# 6        13     67758     Aadac    AADAC
homology <- function(taxid.x, taxid.y) {
  # Assume that `gene_id` is unique
  symbols <- rbind(
    org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
    org.Hs.eg.db::org.Hs.egSYMBOL2EG %>% as.data.frame()
  )


  path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
    from_tsv() %>%
    select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
    filter(taxid %in% c(taxid.x, taxid.y)) %>%
    mutate(taxid = if_else(taxid == taxid.x, "x", "y")) %>%
    stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
    # "multiple rows match for taxid"
    suppressWarnings() %>%
    select(gene_id.x, gene_id.y) %>%
    merge(y = symbols %>% select(gene_id.x = gene_id, symbol.x = symbol), by = "gene_id.x") %>%
    merge(y = symbols %>% select(gene_id.y = gene_id, symbol.y = symbol), by = "gene_id.y")
}

For example:

homology(taxid$mm, taxid$hs) %>%
  head(11) %>%
  {
    .[, order(colnames(.))]
  } %>%
  mutate(seems.resonable = (tolower(symbol.x) == tolower(symbol.y))) %>%
  rbind("...") %>%
  kable()
gene_id.x gene_id.y symbol.x symbol.y seems.resonable
117586 1 A1bg A1BG TRUE
232345 2 A2m A2M TRUE
17961 9 Nat2 NAT1 FALSE
17960 10 Nat1 NAT2 FALSE
16625 12 Serpina3c SERPINA3 FALSE
67758 13 Aadac AADAC TRUE
227290 14 Aamp AAMP TRUE
11298 15 Aanat AANAT TRUE
234734 16 Aars AARS1 FALSE
268860 18 Abat ABAT TRUE
11303 19 Abca1 ABCA1 TRUE

We use that to add description/summary to mouse genes by homology.

gene.summary$mm %<>%
  merge(
    x = .,
    y = merge(
      x = gene.summary$hs %>%
        select(gene_id, summ = summary, desc = description) %>%
        mutate(desc = if_else(is.na(desc), desc, paste("By homology:", desc))) %>%
        mutate(summ = if_else(is.na(summ), summ, paste("By homology:", summ))),
      y = path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
        from_tsv() %>%
        filter(NCBI_Taxon_ID %in% taxid) %>%
        select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
        stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
        select(gene_id = gene_id.9606, alt_gene_id = gene_id.10090),
      by = "gene_id",
      all = FALSE
    ) %>%
      select(gene_id = alt_gene_id, alt_desc = desc, alt_summ = summ),
    by = "gene_id",
    all.x = TRUE,
    all.y = FALSE,
    sort = FALSE
  ) %>%
  mutate(summary = if_else(!is.na(summary), summary, alt_summ)) %>%
  mutate(description = if_else(!is.na(description), description, alt_desc)) %>%
  select(-alt_desc, -alt_summ)
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=10090: first taken
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=9606: first taken

Preview:

Gene sets

Genes of interest

Define gene sets
goi <-
  list(
    neuroinflammation = list(
      name = "Neuroinflammation",
      genes = unique(c(
        # "Early response" transcription factors: Fos, Fosb, Junb
        c("Fos", "Fosb", "Junb"),

        # Wikipathways Neuroinflammation:
        # https://www.wikipathways.org/instance/WP4919_r111814
        # Downloaded from
        # http://www.gsea-msigdb.org/gsea/msigdb/cards/WP_NEUROINFLAMMATION.html
        c("Ascc1", "Chuk", "Fos", "Jun", "Mapk14", "Mapk8", "mt-Co1", "mt-Co2", "Mtor", "Nfkbia", "Nos2", "Rela", "Tlr4")
      ))
    ),

    # From 2021-Manberg-...-Lewandowski, p.643
    pvf = list(
      name = "PVF",
      genes = c("Spp1", "Col6a1", "Col1a1", "Mmp2")
    ),

    # Collagen
    col = list(
      name = "Collagen",
      genes = c(
        "Col10a1", "Col11a1", "Col11a2", "Col12a1", "Col13a1", "Col14a1",
        "Col15a1", "Col16a1", "Col17a1", "Col18a1", "Col19a1", "Col1a1",
        "Col1a2", "Col20a1", "Col22a1", "Col23a1", "Col24a1", "Col25a1",
        "Col26a1", "Col27a1", "Col28a1", "Col2a1", "Col3a1", "Col4a1",
        "Col4a2", "Col4a3", "Col4a3bp", "Col4a4", "Col4a5", "Col4a6",
        "Col5a1", "Col5a2", "Col5a3", "Col6a1", "Col6a2", "Col6a3",
        "Col6a4", "Col6a5", "Col6a6", "Col7a1", "Col8a1", "Col8a2",
        "Col9a1", "Col9a2", "Col9a3", "Colec10", "Colec11", "Colec12",
        "Colgalt1", "Colgalt2", "Colq"
      )
      # Obtained using
      # sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^Col")] } %>% sort()
    ),

    # Mitochondrial
    mt = list(
      name = "Mitochondrial",
      genes = c(
        "mt-Atp6", "mt-Atp8", "mt-Co1", "mt-Co2", "mt-Co3", "mt-Cytb",
        "mt-Nd1", "mt-Nd2", "mt-Nd3", "mt-Nd4", "mt-Nd4l", "mt-Nd5", "mt-Nd6"
      )
    ),
    # Obtained using (e.g.)
    # sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^mt-")] } %>% sort()

    air = list(
      name = "Aerobic respiration",
      genes = c(
        # GO:1903715 (regulation of aerobic respiration)
        # http://www.informatics.jax.org/vocab/gene_ontology/GO:1903715
        c("Actn3", "Akt1", "Bnip3", "Cbfa2t3", "Hif1a", "Ide", "Nop53", "Shmt2", "Trpv4", "Vcp"),

        # GO:0009060 (aerobic respiration) minus GO:1903715 and minus 'mt-Co3'
        # http://www.informatics.jax.org/vocab/gene_ontology/GO:0009060
        c("Aco1", "Aco2", "Adsl", "Afg1l", "Atp5f1d", "Bloc1s1", "Cat", "Cox10", "Cox4i1", "Cox4i2", "Cox5a", "Cox5b", "Cox6a1", "Cox6a2", "Cox7c", "Cs", "Csl", "Cycs", "Cyct", "Dhtkd1", "Dlat", "Dlst", "Fh", "Fxn", "Idh1", "Idh2", "Idh3a", "Idh3b", "Idh3g", "Ireb2", "Mdh1", "Mdh1b", "Mdh2", "Mtco1", "Mtfr1", "Mtfr1l", "Mtfr2", "Mtnd1", "Mtnd4", "Mup1", "Mup11", "Mup2", "Mup3", "Mup4", "Mup5", "Mup6", "Ndufs4", "Ndufs7", "Ndufs8", "Ogdh", "Ogdhl", "Oxa1l", "Pank2", "Pdha1", "Pdha2", "Pdhb", "Proca1", "Q8BPC6", "Sdha", "Sdhaf2", "Sdhb", "Sdhc", "Sdhd", "Sirt3", "Sucla2", "Suclg1", "Suclg2", "Ucn", "Uqcr10", "Uqcrb", "Uqcrh")
      )
    ),
    fib_mig = list(
      name = "Reg. fib. mig.",
      genes = c(
        # GO:0010762 (regulation of fibroblast migration)
        # http://www.informatics.jax.org/vocab/gene_ontology/GO:0010762
        c("Acta2", "Actr3", "Adipor2", "Ager", "Akap12", "Akt1", "Appl1", "Appl2", "Arhgap4", "Bag4", "Braf", "Cln3", "Coro1c", "Cygb", "Ddr2", "Dmtn", "Fer", "Fgf2", "Fgfr1", "Gna12", "Has1", "Hyal2", "Itgb1", "Itgb1bp1", "Itgb3", "MACIR", "Mmp1a", "Mta2", "Pak1", "Pak3", "Prkce", "Prr5l", "Ptk2", "Rac1", "Rcc2", "Rffl", "Sdc4", "Slc8a1", "Tgfb1", "Thbs1", "Tsc2", "Uts2", "Wdpcp")
      )
    ),
    glycolysis = list(
      name = "Anaerobic glycolysis",
      genes = c(
        # GO:0006096 (glycolytic process / anaerobic glycolysis)
        # http://www.informatics.jax.org/vocab/gene_ontology/GO:0006096
        c("Actn3", "Adpgk", "Aldoa", "Aldoart1", "Aldoart2", "Aldob", "Aldoc", "App", "Bpgm", "Cbfa2t3", "Ddit4", "Dhtkd1", "Eif6", "Eno1", "Eno2", "Eno3", "Eno4", "Entpd5", "Ep300", "Esrrb", "Fbp1", "Foxk1", "Foxk2", "Gale", "Galk1", "Galt", "Gapdh", "Gapdhs", "Gck", "Git1", "Gm10358", "Gm11214", "Gm12117", "Gm15294", "Gm3839", "Gpd1", "Gpi", "Hdac4", "Hif1a", "Hk1", "Hk2", "Hk3", "Hkdc1", "Htr2a", "Ier3", "Ifng", "Igf1", "Il3", "Ins2", "Insr", "Jmjd8", "Khk", "Mif", "Mlxipl", "Mpi", "Mtch2", "Myc", "Myog", "Ncor1", "Nupr1", "Ogdh", "Ogt", "P2rx7", "Pfkfb2", "Pfkl", "Pfkm", "Pfkp", "Pgam1", "Pgam2", "Pgk1", "Pgk2", "Pklr", "Pkm", "Ppara", "Ppargc1a", "Prkaa1", "Prkaa2", "Prkag2", "Prkag3", "Prxl2c", "Psen1", "Sirt6", "Slc2a6", "Slc4a1", "Slc4a4", "Stat3", "Tigar", "Tkfc", "Tpi1", "Trex1", "Zbtb20", "Zbtb7a")
      )
    )
  )
Collector of genes from the genesets of interest
# Use a function because the list may change
get_goi_genes <- function() {
  goi %>%
    lapply(as.data.frame) %>%
    dual(base::rbind) %>%
    pull(genes) %>%
    unique()
}
get_goi_genes()[1:7] %>%
  c("...") %>%
  t() %>%
  kable()
Fos Fosb Junb Ascc1 Chuk Jun Mapk14
Inferring gene set from symbol
# Return a vector `goi_set` indicating the gene set for each `symbol`
symbol2goiset <- function(genes) {
  df <- data.frame(symbol = genes, goi_set = "Other")

  for (goi in goi) {
    df %<>% mutate(goi_set = if_else(symbol %in% goi$genes, goi$name, goi_set))
  }

  df %>% pull(goi_set, symbol)
}
Function to show gene info
show_gene_info_table <- function(df) {
  df$symbols # need this column

  df %>%
    mutate(role = symbol2role(symbol)) %>%
    mutate(goi_set = symbol2goiset(symbol)) %>%
    mutate(gene_id = symbol2geneid(symbol)) %>%
    merge(gene.summary$mm, all.x = TRUE, by = "gene_id", sort = FALSE) %>%
    mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
    mutate(description = paste0("<span style='font-size:70%'>", description, "</span>")) %>%
    select(-summary) %>%
    mutate(symbol = as.link(symbol)) %>%
    DT::datatable(rownames = NULL, escape = FALSE, width = "100%")
}
get_goi_genes() %>%
  sample(size = 3) %>%
  data.frame(symbol = .) %>%
  rbind("...") %>%
  show_gene_info_table()
Plot from PCA
# TODO: normalize with edgeR?
# Plots the PCA, showing the expression of genes
show_goi_pca <- function(sce, genes, group.by = "celltype", colors = ramp(as.character(sce@colData[[group.by]]))) {
  cbind(
    # Reduced dim coordinates
    sce@int_colData$reducedDims$PCA %>%
      as.data.frame(row.names = colnames(sce)) %>%
      select(x = PC1, y = PC2),
    # Metadata
    sce@colData %>%
      as.data.frame(row.names = colnames(sce)) %>%
      select(celltype = all_of(group.by)),
    # Expression data for genes-of-interest
    sce@assays@data$counts %>%
      log1p() %>%
      `[`(genes[genes %in% rownames(.)], ) %>%
      t()
  ) %>%
    # Make long table
    reshape2::melt(
      # Keep as separate columns:
      id.vars = c("x", "y", "celltype"),
      # These columns will be stacked, in a column with this name:
      measure.vars = genes, variable.name = "symbol"
    ) %>%
    # Plot
    ggplot(aes(x = x, y = y, color = celltype)) +
    geom_point(aes(size = value), alpha = 0.4) +
    scale_color_manual(values = colors, breaks = gglast(colour)) + 
    # theme_void() +
    theme(
      axis.title = element_blank(),
      axis.text = element_blank(),
    ) +
    labs(color = group.by, size = "Expression") +
    facet_wrap(vars(symbol))
}
Example
get_goi_genes() %>%
  mock_sce(genes = ., ncells = 77) %>%
  scater::logNormCounts() %>%
  scater::runPCA() %>%
  show_goi_pca(goi$pvf$genes, group.by = "Cell_Cycle", colors = list(`G0` = "Red", `G1` = "Blue", `G2M` = "Orange", `S` = "DarkGreen", `Dead` = "Black"))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.

Aggregate by sample group

# Aggregates genes by `group.by` for each geneset from `goi`
goi_agg_by_group <- function(sce, group.by = "celltype") {
  goi %>%
    lapply(function(goi) {
      (dual(rbind))(as.list(group.by) %>% lapply(function(group.by) {
        if (any(goi$genes %in% rownames(sce))) {
          cbind(
            # Metadata
            sce@colData %>%
              as.data.frame(row.names = colnames(sce)) %>%
              select(subgroup = all_of(group.by)),
            # Expression data for genes-of-interest
            sce %>%
              scater::logNormCounts() %>%
              SingleCellExperiment::logcounts() %>%
              take_rows(goi$genes) %>%
              t()
          ) %>%
            # Aggregate by sample
            group_by(subgroup) %>%
            summarise(across(everything(), mean)) %>%
            # Make long table
            reshape2::melt(
              # Keep as separate columns:
              id.vars = c("subgroup"),
              # Other columns will be stacked, in a column with this name:
              variable.name = "symbol"
              # A column `value` contains their values
            ) %>%
            mutate(group = group.by) %>%
            mutate(goi_set = goi$name)
        }
      }))
    }) %>%
    # Relabel the elements of the list
    dual(rbind) %>%
    split(.$goi_set)

  # cf.
  # https://stackoverflow.com/questions/33775239/emulate-split-with-dplyr-group-by-return-a-list-of-data-frames
}
Example
get_goi_genes() %>%
  mock_sce(genes = ., ncells = 77) %>%
  goi_agg_by_group(group.by = "Treatment") %>%
  lapply(head)
## $`Aerobic respiration`
##       subgroup symbol    value     group             goi_set
## air.1   treat1  Actn3 1.904347 Treatment Aerobic respiration
## air.2   treat2  Actn3 2.312375 Treatment Aerobic respiration
## air.3   treat1   Akt1 7.215821 Treatment Aerobic respiration
## air.4   treat2   Akt1 7.179238 Treatment Aerobic respiration
## air.5   treat1  Bnip3 5.517770 Treatment Aerobic respiration
## air.6   treat2  Bnip3 5.843600 Treatment Aerobic respiration
## 
## $`Anaerobic glycolysis`
##              subgroup symbol    value     group              goi_set
## glycolysis.1   treat1  Actn3 1.904347 Treatment Anaerobic glycolysis
## glycolysis.2   treat2  Actn3 2.312375 Treatment Anaerobic glycolysis
## glycolysis.3   treat1  Adpgk 2.892426 Treatment Anaerobic glycolysis
## glycolysis.4   treat2  Adpgk 2.847102 Treatment Anaerobic glycolysis
## glycolysis.5   treat1  Aldoa 3.626778 Treatment Anaerobic glycolysis
## glycolysis.6   treat2  Aldoa 4.233222 Treatment Anaerobic glycolysis
## 
## $Collagen
##       subgroup  symbol    value     group  goi_set
## col.1   treat1 Col10a1 6.798632 Treatment Collagen
## col.2   treat2 Col10a1 4.794314 Treatment Collagen
## col.3   treat1 Col11a1 4.770213 Treatment Collagen
## col.4   treat2 Col11a1 5.417654 Treatment Collagen
## col.5   treat1 Col11a2 9.131331 Treatment Collagen
## col.6   treat2 Col11a2 9.142413 Treatment Collagen
## 
## $Mitochondrial
##      subgroup  symbol    value     group       goi_set
## mt.1   treat1 mt-Atp6 9.198091 Treatment Mitochondrial
## mt.2   treat2 mt-Atp6 8.463647 Treatment Mitochondrial
## mt.3   treat1 mt-Atp8 8.300985 Treatment Mitochondrial
## mt.4   treat2 mt-Atp8 8.090240 Treatment Mitochondrial
## mt.5   treat1  mt-Co1 4.740320 Treatment Mitochondrial
## mt.6   treat2  mt-Co1 4.702137 Treatment Mitochondrial
## 
## $Neuroinflammation
##                     subgroup symbol    value     group           goi_set
## neuroinflammation.1   treat1    Fos 9.214055 Treatment Neuroinflammation
## neuroinflammation.2   treat2    Fos 8.945624 Treatment Neuroinflammation
## neuroinflammation.3   treat1   Fosb 9.204561 Treatment Neuroinflammation
## neuroinflammation.4   treat2   Fosb 9.065280 Treatment Neuroinflammation
## neuroinflammation.5   treat1   Junb 3.195598 Treatment Neuroinflammation
## neuroinflammation.6   treat2   Junb 2.399009 Treatment Neuroinflammation
## 
## $PVF
##       subgroup symbol    value     group goi_set
## pvf.1   treat1   Spp1 6.472082 Treatment     PVF
## pvf.2   treat2   Spp1 7.130099 Treatment     PVF
## pvf.3   treat1 Col6a1 7.754018 Treatment     PVF
## pvf.4   treat2 Col6a1 7.328395 Treatment     PVF
## pvf.5   treat1 Col1a1 5.324151 Treatment     PVF
## pvf.6   treat2 Col1a1 5.688841 Treatment     PVF
## 
## $`Reg. fib. mig.`
##           subgroup  symbol    value     group        goi_set
## fib_mig.1   treat1   Acta2 9.492388 Treatment Reg. fib. mig.
## fib_mig.2   treat2   Acta2 9.392135 Treatment Reg. fib. mig.
## fib_mig.3   treat1   Actr3 2.322423 Treatment Reg. fib. mig.
## fib_mig.4   treat2   Actr3 2.649601 Treatment Reg. fib. mig.
## fib_mig.5   treat1 Adipor2 1.427232 Treatment Reg. fib. mig.
## fib_mig.6   treat2 Adipor2 1.139268 Treatment Reg. fib. mig.
Aggregate by gene set
# For each cell, aggregate each gene set
# Returns a sample x geneset table (with additional metadata columns)
goi_agg_by_genes <- function(sce) {
  cbind(
    # Sample metadata
    sce@colData %>%
      as.data.frame() %>%
      select(any_of(c("cluster", "celltype", "group", "subgroup", "Group"))),

    # Mean expression for genes-of-interest
    lapply(goi, function(goi) {
      list() %>% within({
        assign(
          goi$name,
          sce@assays@data$counts %>%
            log1p() %>%
            take_rows(goi$genes) %>%
            colMeans()
        )
      })
    }),

    # Library size
    data.frame(lib.size = sce@assays@data$counts %>% colSums()),

    # Reduced dim coordinates
    sce %>%
      scater::logNormCounts() %>%
      scater::runPCA() %>%
      SingleCellExperiment::reducedDim("PCA") %>%
      as.data.frame() %>%
      select(x = PC1, y = PC2, z = PC3)
  )
}
Example
get_goi_genes() %>%
  mock_sce(genes = ., ncells = 77) %>%
  goi_agg_by_genes() %>%
  head() %>%
  DT::datatable(width = "100%", options = list(scrollX = TRUE))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.

Kidney fibroblasts

This is a set of differential genes in kidney fibroblast activation from Deng et al., 2021.

Load
fb_genes_si7 <- path_to("*/Deng-Wei-2021/*/SI7.*.gz") %>% from_tsv()
# fb_genes_si8 <- path_to("*/Deng-Wei-2021/*/SI8.*.gz") %>% from_tsv()
show_tbl <- function(tbl) {
  tbl %<>%
    ind2col("symbol") %>%
    mutate(symbol = as.link(symbol)) %>%
    select(symbol, logFC)

  tbl %>%
    DT::datatable(escape = FALSE) %>%
    DT::formatSignif(colnames(tbl %>% select(-symbol)), digits = 3)
}
SI7

The DE table from SI7 extracted from GSE121190.

fb_genes_si7 %>%
  filter(adj.P.Val < 1e-2) %>%
  show_tbl()

Daneman, 2021

The list of genes is from Dorrier-Aran-…-Daneman, 2021, Extended Data Fig. 4.

Add clusters to genes of interest
if (!("c0" %in% names(goi))) {
  goi %<>%
    c(
      list(
        `0` = c("Col1a2", "Bgn", "Col3a1", "Col8a1", "Igf1", "Ifitm1", "Col1a1", "Cfh", "Sparc", "B2m"),
        `1` = c("Apod", "Trf", "Dcn", "Gsn", "Gpc3", "Tgfbi", "Ccl11", "Smoc2", "Lum", "Cst3"),
        `2` = c("Lgals1", "Timp1", "Rpl41", "Rps18", "Rps12", "Col4a1", "Cxcl10"),
        `3` = c("Junb", "Fosb", "Fos", "Ptn", "Igfbp2", "Klf4", "Clec3b", "Apoe", "Rbp4", "Vtn"),
        `4` = c("Rbp1", "Fth1", "Aldh1a2", "Fn1", "Igfbp3", "Slc7a11", "Ptgds", "Timp3", "Igfbp5"),
        `5` = c("Ptch1", "Igfbp6", "Tmsb4x", "S100a6", "Igfbp7", "Mgp", "Saa1"),
        `6` = c("Itih5", "Spp1", "Klf2", "Ubb", "Jund", "Mt1"),
        `7` = c("Rgs5", "Acta2", "Myl9", "Gm13889", "Tagln", "Tpm2", "Actb")
      ) %>%
        stack() %>%
        mutate(cluster = paste0("c", ind), name = paste0("Dm #", ind)) %>%
        split(.$cluster) %>%
        lapply(function(c) list(name = unique(c$name), genes = c$values))
    )
}
Visualizing in heatmap
heatmap <- function(sce, colors = NULL, anno_col = NULL, anno_row = NULL, ...) {
  # `colors` should be like list(`negative` = "Blue", `positive` = "Red", ...)
  colors %<>% c(NULL) %>% unlist()

  if (!is.null(anno_row)) {
    stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
  }

  if (!is.null(anno_col)) {
    stopifnot(assertthat::are_equal(colnames(sce), rownames(anno_col)))
  }

  anno_col %<>%
    list(
      sce@colData %>%
        as.data.frame() %>%
        select(any_of(c("celltype", "subgroup", "Group", "cluster", "Treatment")))
    ) %>%
    dual(cbind)

  # print(anno_col %>% as.list() %>% lapply(\(x) if (all(x %in% names(colors))) colors[unique(as.character(x))] else ramp(x)))

  sce %>%
    SingleCellExperiment::counts() %>%
    log1p() %>%
    {
      # (.) - rowMeans(.)
      # (.) / rowSums(.)
    } %>%
    pheatmap::pheatmap(
      show_colnames = FALSE,
      treeheight_row = 0,
      treeheight_col = 0,
      cluster_rows = FALSE,
      fontsize = 6,
      color = grDevices::colorRampPalette(c("black", "yellow"))(10),
      annotation_col = anno_col,
      annotation_row = anno_row,
      annotation_colors = c(
        anno_row %>% as.list() %>% lapply(ramp),
        anno_col %>% as.list() %>% lapply(\(x) if (all(x %in% names(colors))) colors[unique(as.character(x))] %>% unlist() else ramp(x))
      ),
      ...
    )
}
show_dm_heatmap <- function(sce, anno_row = NULL, ...) {
  dm_genes <-
    goi[grep("c[0-9]", goi %>% names())] %>%
    lapply(as.data.frame) %>%
    dual(rbind) %>%
    pull(name, genes)

  if (!is.null(anno_row)) {
    stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
  }

  anno_row %<>%
    take_rows(names(dm_genes))

  sce %<>%
    take_rows(names(dm_genes))

  anno_row %<>%
    list(data.frame(dm.cluster = dm_genes[rownames(sce)])) %>%
    dual(cbind)

  heatmap(sce, anno_row = anno_row, ...)
}
unlist(goi[grep("c[0-9]", goi %>% names())]) %>%
  mock_sce(ncells = 77) %>%
  {
    (.)[, order((.)$celltype)]
  } %>%
  show_dm_heatmap(
    colors = colors,
    cluster_cols = FALSE,
    anno_col = data.frame(x = ((1:ncol(.)) %% 2), row.names = colnames(.)),
    anno_row = data.frame(y = ((1:nrow(.)) %% 2), row.names = rownames(.))
  )

All GoI

The following table shows all the “genes of interest”.

get_goi_genes() %>%
  data.frame(symbol = .) %>%
  show_gene_info_table()

Analysis tools

Dummy dataset

Small dataset to run through quickly.

sce_dummy <- readRDS(path_to("*/*/sce_dummy.rds"))
# Made with:
# load_dataset$dm(size = 77) %>% keep_only_goi() %>% saveRDS(file = "sce_dummy.rds")
## class: SingleCellExperiment 
## dim: 267 74 
## metadata(0):
## assays(1): counts
## rownames(267): Fos Fosb ... Zbtb20 Zbtb7a
## rowData names(0):
## colnames(74): healthy2_AATCCAGCATAGTAAG EAE1_AAAGATGTCGGAAATA ...
##   EAE1_CTGTTTAAGAGTCTGG EAE1_TTAGTTCTCTCTGAGA
## colData names(4): group subgroup cluster celltype
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
data.frame(
  x = sce_dummy@assays@data$counts %>% colSums(),
  subgroup = sce_dummy$subgroup
) %>%
  ggplot(aes(x = x, color = subgroup)) +
  scale_color_manual(values = colors[gglast(colour)]) +
  geom_density()

Scater

Computation of red. dim.
scater_attach <- function(sce) {
  scater::addPerCellQC(
    x = sce,
    subsets = list(Mito = grep("mt-", rownames(sce)))
  ) %>%
    scater::logNormCounts() %>%
    scater::runPCA(name = "PCA", ncomponents = 20) %>%
    scater::runTSNE(perplexity = 10, dimred = "PCA") %>%
    scater::runUMAP()
}
sce_dummy %<>% scater_attach()
## Read the 74 x 20 data matrix successfully!
## OpenMP is working. 1 threads.
## Using no_dims = 2, perplexity = 10.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.00 seconds (sparsity = 0.595690)!
## Learning embedding...
## Iteration 50: error is 63.296339 (50 iterations in 0.01 seconds)
## Iteration 100: error is 60.037220 (50 iterations in 0.01 seconds)
## Iteration 150: error is 64.801343 (50 iterations in 0.01 seconds)
## Iteration 200: error is 62.466966 (50 iterations in 0.01 seconds)
## Iteration 250: error is 64.566597 (50 iterations in 0.01 seconds)
## Iteration 300: error is 1.927342 (50 iterations in 0.00 seconds)
## Iteration 350: error is 1.369595 (50 iterations in 0.01 seconds)
## Iteration 400: error is 1.140789 (50 iterations in 0.00 seconds)
## Iteration 450: error is 0.978576 (50 iterations in 0.00 seconds)
## Iteration 500: error is 0.799270 (50 iterations in 0.00 seconds)
## Iteration 550: error is 0.704285 (50 iterations in 0.00 seconds)
## Iteration 600: error is 0.672386 (50 iterations in 0.00 seconds)
## Iteration 650: error is 0.660327 (50 iterations in 0.00 seconds)
## Iteration 700: error is 0.643767 (50 iterations in 0.00 seconds)
## Iteration 750: error is 0.638919 (50 iterations in 0.00 seconds)
## Iteration 800: error is 0.638740 (50 iterations in 0.00 seconds)
## Iteration 850: error is 0.640056 (50 iterations in 0.01 seconds)
## Iteration 900: error is 0.641027 (50 iterations in 0.01 seconds)
## Iteration 950: error is 0.641538 (50 iterations in 0.00 seconds)
## Iteration 1000: error is 0.641952 (50 iterations in 0.00 seconds)
## Fitting performed in 0.12 seconds.
## 07:36:29 UMAP embedding parameters a = 1.896 b = 0.8006
## 07:36:29 Read 74 rows and found 267 numeric columns
## 07:36:29 Reducing X column dimension to 50 via PCA
## 07:36:29 PCA: 50 components explained 95.41% variance
## 07:36:29 Using FNN for neighbor search, n_neighbors = 15
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/FNN/libs/FNN.so") ...
## 07:36:30 Commencing smooth kNN distance calibration using 4 threads
## 07:36:31 Initializing from normalized Laplacian + noise
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSpectra/libs/RSpectra.so") ...
## 07:36:31 Commencing optimization for 500 epochs, with 1758 positive edges
## 07:36:31 Optimization finished
sce_dummy %>%
  scater::makePerCellDF() %>%
  head(3) %>%
  DT::datatable(width = "100%")
Plotting (2d)
scater_show <- function(sce, color.by = "celltype", colors = NULL) {
  items <- sce@colData[[color.by]]
  
  colors <- 
    if (all(items %in% names(colors))) colors[items] else ramp(items, palette = "Set 2")

  sce %>%
    {
      function(which_dimred) {
        suppressMessages(
          scater::plotReducedDim(
            dimred = which_dimred,
            object = (.),
            colour_by = color.by
          ) +
            scale_color_manual(values = colors) +  # no need for `breaks`
            labs(color = color.by) +
            guides(color = guide_legend(override.aes = list(size = 2, alpha = 1))) +
            theme(
              axis.text = element_blank(),
              axis.ticks = element_blank()
            )
        )
      }
    } %>%
    {
      gridExtra::arrangeGrob((.)("PCA"), (.)("TSNE"), (.)("UMAP"), nrow = 3)
    }
}
sce_dummy %>%
  scater_show(color.by = "subgroup", colors = colors) %>%
  gridExtra::grid.arrange()

Plotting (3d)

Template, not for direct use.

pca3d_template <- function(sce, color.by = "celltype", colors = ramp(sce@colData[[color.by]])) {
  data.frame(
    color_group = sce@colData[[color.by]],
    lib.size = sce@assays@data$counts %>% colSums(),
    SingleCellExperiment::reducedDim(sce, "PCA") %>%
      as.data.frame() %>%
      select(PC1, PC2, PC3)
  ) %>%
    group_by(color_group) %>%
    sample(size = 30, replace = TRUE) %>%
    ungroup() %>%
    # print() %>%

    plotly::plot_ly() %>%
    plotly::add_markers(
      x = ~PC1, y = ~PC2, z = ~PC3,
      marker = list(size = 2, opacity = 1),
      color = ~color_group,
      colors = colors
    ) %>%
    plotly::layout(
      scene = list(
        xaxis = list(title = "PC1", showticklabels = FALSE),
        yaxis = list(title = "PC2", showticklabels = FALSE),
        zaxis = list(title = "PC3", showticklabels = FALSE)
      )
    )
}
sce_dummy %>% pca3d_template(color.by = "subgroup", colors = colors)

Cell cycle

Cell cycle genes
default_cyclic_genes <-
  # Convert the "cyclic genes" of `Revelio` from Hs to Mm
  Revelio::revelioTestData_cyclicGenes %>%
  lapply(function(genes) {
    merge(
      x = data.frame(symbol.x = genes),
      y = homology(taxid$hs, taxid$mm),
      by = "symbol.x",
      all.x = TRUE
    ) %>%
      pull(symbol.y) %>%
      sort(na.last = TRUE)
  })
default_cyclic_genes %>%
  as.data.frame() %>%
  head() %>%
  rbind("...") %>%
  kable()
G1.S S G2 G2.M M.G1
Abca7 a 1700001L19Rik Adh4 1700102P08Rik
Acd Abcc2 1700066M21Rik Ahi1 2700049A03Rik
Acyp1 Abcc5 Alkbh1 Akirin2 Afap1
Adamts1 Abhd10 Anln Ankrd40 Agfg1
Adck2 Adam22 Ap3d1 Anln Agpat3
Adcy6 Arhgap42 Arhgap19 Arhgap19 Akap13
Computation of cell cycle phase

The DC1-DC2 plane is supposed to show a circle reflecting the cell cycle (cf. Fig 1 in Schwabe et al., 2020).

cell_cycle <- function(sce, batch.by = NA, cyclic_genes = default_cyclic_genes) {
  list(
    counts = sce@assays@data$counts,
    orig.id = colnames(sce),
    batch = if (is.na(batch.by)) "cell" else paste0(sce@colData[[batch.by]], "_")
  ) %>%
    within({
      colnames(counts) <- paste0(batch, 1:ncol(counts))
      orig.id <- data.frame(old = orig.id, new = colnames(counts)) %>% pull(old, new)
    }) %>%
    with({
      counts %>%
        Revelio::createRevelioObject(cyclicGenes = cyclic_genes, lowernGeneCutoff = 0) %>%
        Revelio::getCellCyclePhaseAssignInformation() %>%
        Revelio::getPCAData() %>%
        Revelio::getOptimalRotation() %>%
        list(cyclic = .) %>%
        with({
          assertthat::assert_that(all(
            rownames(cyclic@cellInfo) == colnames(cyclic@transformedData$pca$data)
          ))

          cbind(
            cyclic@transformedData$pca$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
            cyclic@transformedData$dc$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
            cyclic@cellInfo
          )
        }) %>%
        data.frame(row.names = orig.id[rownames(.)])
    }) %>%
    cbind(as.data.frame(sce@colData)[rownames(.), , drop = FALSE])
}
2d plotting of cell cycle “trajectory”
cell_cycle_plot2d <- function(cco, colors = ramp(cco$ccPhase)) {
  cco %>%
    ggplot(aes(x = DC1, y = DC2, color = ccPhase)) +
    geom_point(size = 3, stroke = 1) +
    scale_color_manual(values = colors, breaks = gglast(colour))
}
Test on Revelio data
cco_revelio <-
  cell_cycle(
    Revelio::revelioTestData_rawDataMatrix %>%
      make_sce(
        counts = .,
        coldata = data.frame(
          x = colnames(.) %>% strsplit("_") %>% dual(rbind),
          row.names = colnames(.)
        ) %>% select(batch = x.1)
      ),
    batch.by = "batch",
    cyclic_genes = Revelio::revelioTestData_cyclicGenes
  )
## 2021-06-28 21:27:59: calculating optimal rotation: 2021-06-28 21:27:59: calculating PCA: 2021-06-28 21:27:59: assigning cell cycle phases: 2021-06-28 21:27:59: reading data: 4.8secs
## 27.48secs
## 50.82secs
## 1.23mins
cco_revelio %>%
  ggplot(aes(x = DC1, y = DC2, color = ccPhase, shape = batch)) +
  geom_point(size = 3, stroke = 1) +
  scale_color_manual(values = colors, breaks = gglast(colour))

Test on mock data
cco_mock <-
  unlist(as.list(default_cyclic_genes)) %>%
  {
    (.)[!is.na(.)]
  } %>%
  mock_sce(ncells = 33) %>%
  cell_cycle()
## 2021-06-28 21:29:14: calculating optimal rotation: 2021-06-28 21:29:14: calculating PCA: 2021-06-28 21:29:14: assigning cell cycle phases: 2021-06-28 21:29:14: reading data: 0.01secs
## 0.36secs
## 0.41secs
## 43.04secs
cco_mock %>%
  ggplot(aes(x = DC1, y = DC2, color = celltype, fill = ccPhase)) +
  geom_point(shape = 21, size = 5) +
  ggplot2::scale_color_manual(values = colors, breaks = gglast(colour)) +
  ggplot2::scale_fill_manual(values = colors, breaks = gglast(fill))

3d plotting of cell cycle “trajectory” with plotly
cell_cycle_plot3d <- function(cco, colors = ramp(cco$ccPhase)) {
  cco %>%
    with({
      plotly::plot_ly() %>%
        plotly::add_markers(
          x = DC1, y = DC2, z = DC3,
          size = 4,
          color = ccPhase,
          colors = colors
        )
    })
}
Test 3d plot
cco_mock %>% cell_cycle_plot3d()
Alternative 3d plotting with rgl
cco_mock %>%
  with({
    rgl::plot3d(DC1, DC2, DC3, size = 3, type = "s", col = ramp(ccPhase)[ccPhase])
    rgl::rglwidget()
  })

DE

Computation of DE genes
deseq <- function(sce, design) {
  sce %>%
    DESeq2::DESeqDataSet(design = design) %>%
    DESeq2::estimateSizeFactors(type = "poscounts") %>%
    DESeq2::DESeq(quiet = TRUE, fitType = "local") %>%
    DESeq2::results() %>%
    as.data.frame() %>%
    select(base = baseMean, lfc = log2FoldChange, p = pvalue) %>%
    mutate(base = log1p(base)) %>%
    tidyr::drop_na() %>%
    mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
    ind2col("symbol") %>%
    list(table = ., design = design)
}
edger <- function(sce, design) {
  sce %>%
    edgeR::estimateDisp(robust = TRUE) %>%
    # edgeR::calcNormFactors(method = "TMM") %>%
    # edgeR::estimateGLMTagwiseDisp(design = design) %>%
    edgeR::glmFit(design = model.matrix(design, data = sce@colData)) %>%
    edgeR::glmLRT(coef = 2) %>%
    `$`(table) %>%
    select(base = logCPM, lfc = logFC, p = PValue, LR = LR) %>%
    tidyr::drop_na() %>%
    mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
    ind2col("symbol") %>%
    list(table = ., design = design)
}
# Hacky fctn to determine colors for `lfc > 0` and `lfc < 0`
lfc_colors <- function(sce, design, colors) {
  f <- terms(design) %>% attr("factors")
  stopifnot((nrow(f) == 1) && (ncol(f) == 1))

  f <- sce@colData[[f %>% colnames()]]

  cc <- colors[f %>% levels()]
  cc <- c(cc[[1]], (rowMeans(col2rgb(cc[-1])) / 255) %>% as.list() %>% dual(rgb))

  (. %>% (colorRamp(cc)) %>% RGB())
}
compute_de <- function(sce, design) {
  list(
    deseq = suppressMessages(deseq(sce, design)),
    edger = suppressMessages(edger(sce, design))
  ) %>%
    with({
      list(
        table = rbind(
          deseq$table %>%
            select(symbol, base, lfc, p) %>%
            mutate(src = "DESeq2"),
          edger$table %>%
            select(symbol, base, lfc, p) %>%
            mutate(src = "edgeR")
        ),
        design = design
      )
    }) %>%
    within({
      # Attach LFC color (not very important)
      table %<>%
        group_by(src) %>%
        mutate(color = (lfc_colors(sce, design, colors))(percent_rank(lfc))) %>%
        ungroup()
    })
}
Example:
sce_dummy.de <- sce_dummy %>% compute_de(~group)
sce_dummy.de$table %>%
  head() %>%
  rbind("...") %>%
  kable()
symbol base lfc p src color
Fos 1.94433786752507 1.03952791369199 0.0350644319817966 DESeq2 #C98144FF
Fosb 1.32188433643544 0.392699195334891 0.94357785439112 DESeq2 #A09B36FF
Junb 1.71655787188708 0.618371363877933 0.403770457436529 DESeq2 #B0913BFF
Spp1 0.685515786871741 1.75726212594778 0.121724809299487 DESeq2 #E4704DFF
Col6a1 0.644138066638724 0.767376344078733 0.324044869548652 DESeq2 #B68D3DFF
Col1a1 2.86899094589606 1.87748518409641 1.75462356248164e-09 DESeq2 #E86E4EFF
Pretty-printing
show_de_table <- function(.) {
  (.) %>%
    group_by(src) %>%
    slice_min(p, n = 777) %>%
    slice_max(abs(lfc), n = 777) %>%
    ungroup() %>%
    merge(
      y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
      all.x = TRUE,
      by = "symbol"
    ) %>%
    merge(
      y = gene.summary$mm %>% select(gene_id, description, summary),
      all.x = TRUE,
      by = "gene_id"
    ) %>%
    select(-any_of("gene_id")) %>%
    select(-any_of("color")) %>%
    # Role: TF, Rec, Lig, etc
    mutate(role = symbol2role(symbol)) %>%
    mutate(symbol = as.link(symbol)) %>%
    mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
    select(-summary) %>%
    mutate(description = paste0("<span style='font-size: 70%;'>", description, "</span>")) %>%
    DT::datatable(escape = FALSE) %>%
    DT::formatSignif(c("base", "lfc", "p"), digits = 3)
}
Example
sce_dummy.de$table %>%
  group_by(src) %>%
  slice_min(p, n = 33) %>%
  ungroup() %>%
  show_de_table()
As heatmap
show_de_heatmap <- function(sce, de_table, ...) {
  top <- de_table %>%
    group_by(lfc > 0) %>%
    slice_min(p, n = 33, with_ties = FALSE) %>%
    ungroup() %>%
    arrange(lfc) %>%
    select(symbol, lfc, p) %>%
    mutate(`-log10(p)` = -log10(p)) %>%
    by_col1() %>%
    `[`(rownames(.) %in% rownames(sce), )

  sce[rownames(top), ] %>%
    heatmap(anno_row = top, ...)
}
Example
sce_dummy %>%
  show_de_heatmap(sce_dummy.de$table %>% filter(src == "edgeR"), colors = colors)

Volcano plot
show_de_volcano <- function(de_table) {
  de_table %<>%

    mutate(goi_set = symbol2goiset(symbol)) %>%
    group_by(src) %>%
    arrange(lfc) %>%
    mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
    ungroup() %>%
    mutate(label = ifelse(is_out, symbol, ""))

  ggplot(de_table, aes(x = lfc, y = -log10(p))) +

    # All genes
    geom_point(color = "gray") +

    # Color by geneset
    geom_point(
      data = de_table %>% filter(goi_set != "Other"),
      aes(x = lfc, y = -log10(p), color = goi_set)
    ) +
    scale_color_manual(values = ramp(de_table$goi_set)) +
    labs(color = "Gene set") +

    # Label outliers
    geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 4), size = 3, color = "blue") +
    facet_grid(. ~ src, scale = "free") +
    theme(legend.position = "bottom")
}
Example
sce_dummy.de$table %>% show_de_volcano()

Expression vs LFC plot
show_de_basecamp <- function(.) {
  (.) %>%
    mutate(goi_set = symbol2goiset(symbol)) %>%
    group_by(goi_set) %>%
    arrange(lfc) %>%
    mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
    mutate(is_out = is_out | (min(tail(sort(base), 5)) <= base)) %>%
    ungroup() %>%
    mutate(label = ifelse(is_out, symbol, "")) %>%
    ggplot(aes(x = lfc, y = base)) +
    geom_point(data = (.), alpha = 0.5, size = 1, color = "grey") +
    geom_point(color = "black", size = 0.5) +
    scale_y_log10() +
    geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 0.01), size = 2, color = "blue") +
    facet_wrap(~goi_set, scale = "free_y") +

    # Note: put labs(x = "..") outside
    labs(y = "Expression") +
    theme(
      legend.position = "bottom",
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
    )
}
Example
sce_dummy.de$table %>%
  show_de_basecamp() +
  labs(x = "Condition1 <-- LFC --> Condition2")

Comparing LFC of DESeq2 vs edgeR
show_de_cmp_lfc <- function(table) {
  merge(
    table %>% filter(src == "DESeq2"),
    table %>% filter(src == "edgeR"),
    by = "symbol",
    all = TRUE
  ) %>%
    # tidyr::replace_na(list(p.x = 0, p.y = 0)) %>%
    tidyr::drop_na() %>%
    mutate(sig.x = (p.x < quantile(p.x, 0.01))) %>%
    mutate(sig.y = (p.y < quantile(p.y, 0.01))) %>%
    mutate(sig = if_else(sig.x & sig.y, "Intersection", if_else(sig.x, "DESeq", if_else(sig.y, "edgeR", "")))) %>%
    mutate(label = if_else(sig.x | sig.y, symbol, "")) %>%
    {
      ggplot((.) %>% filter(sig != ""), aes(x = lfc.x, y = lfc.y)) +
        geom_point(data = (.) %>% filter(sig == ""), stroke = 0, alpha = 0.2) +
        geom_point(aes(color = sig)) +
        labs(x = "LFC by DESeq2", y = "LFC by edgeR", color = "Top %1 signif.") +
        geom_text(aes(label = label, color = sig), hjust = -0.3, position = position_jitter(h = 0.05), size = 3, alpha = 0.8, show.legend = FALSE) +
        geom_abline(slope = 1, intercept = 0, linetype = "dashed")
    }
}
Example
sce_dummy.de$table %>% show_de_cmp_lfc()

GO

Computation
compute_go <- function(de_table, n = 33) {
  de_table %>%
    mutate(direction = if_else(lfc > 0, "up", "dn")) %>%
    group_by(direction) %>%
    slice_min(p, n = n, with_ties = TRUE) %>%
    slice_max(abs(lfc), n = n, with_ties = FALSE) %>%
    split(.$direction) %>%
    within({
      f <- (function(x) x$symbol[1:5] %>% paste(collapse = ", "))
      message(paste("Up:", f(up), "..."))
      message(paste("Dn:", f(dn), "..."))
      rm(f)
    }) %>%
    lapply(function(de_table) {
      de_table %>%
        # Get the Entrez IDs of our genes
        mutate(gene_id = symbol2geneid(symbol)) %>%
        pull(gene_id) %>%
        # GO enrichment analysis
        limma::goana(species = "Mm") %>%
        ind2col("category") %>%
        rename(p = P.DE, `#DE` = DE, `#cat` = N, GO = Ont, term = Term) %>%
        # Limit selection
        filter(p <= 0.05) %>%
        arrange(p)
    })
}
Pretty-printing
show_go_table <- function(.) {
  (.) %>%
    with({
      rbind(
        up %>% mutate(direction = "lfc_up"),
        dn %>% mutate(direction = "lfc_down")
      ) %>%
        arrange(p)
    }) %>%
    mutate(category = if_else(grepl("^GO:", category), as.link(category), category)) %>%
    mutate(category = paste0("<span style='white-space:nowrap;'>", category, "(", GO, ")", "</span>")) %>%
    mutate(direction = if_else(grepl("lfc_u", direction), paste0("<span style='color:darkred'>", direction, "</span>"), direction)) %>%
    mutate(direction = if_else(grepl("lfc_d", direction), paste0("<span style='color:darkgreen'>", direction, "</span>"), direction)) %>%
    dplyr::select(any_of(c("category", "direction", "p", "#DE", "#cat", "term"))) %>%
    DT::datatable(escape = FALSE, width = "100%") %>%
    DT::formatSignif("p", digits = 3)
}
Example
sce_dummy.go <-
  sce_dummy.de$table %>%
  compute_go()
## Up: Col4a2, Col4a1, Ier3, Col18a1, Col8a1 ...
## Dn: Col9a2, Pdhb, Col9a2, Ddit4, Col4a6 ...
rbind(
  sce_dummy.go$up %>% head(2),
  sce_dummy.go$dn %>% head(2),
  "..."
) %>%
  DT::datatable(escape = FALSE, width = "100%")
sce_dummy.go %>% show_go_table()

Misc

Cosine similarity
# If A and B are SingleCellExperiment-like, then `cosine` computes
# the cosine similarity of samples of A against samples of B
cosine <- function(A, B) {
  A <- A@assays@data$counts
  B <- B@assays@data$counts
  ii <- intersect(rownames(A), rownames(B))
  A <- t(as.matrix(A[ii, , drop = FALSE]))
  B <- t(as.matrix(B[ii, , drop = FALSE]))
  A <- A / sqrt(rowSums(A * A))
  B <- B / sqrt(rowSums(B * B))
  A %*% t(B)
}

Example:

cosine(
  sce_dummy[, sce_dummy$subgroup == "healthy1"],
  sce_dummy[, sce_dummy$subgroup == "healthy1"]
) %>%
  pheatmap::pheatmap()

Co-expression

Hand-made
coex <- function(sce) {
  # Filter for the correlation matrix
  f <- function(.) ((.) != 0)

  sce %>%
    SingleCellExperiment::counts() %>%
    t() %>%
    # stats::cor(method = "spearman") %>%
    Hmisc::rcorr(type = "spearman") %>%
    `$`(r) %>%
    {
      with(
        (upper.tri(.) * (.)) %>% f() %>% which(arr.ind = TRUE) %>% as.data.frame(),
        data.frame(a = rownames(.)[row], b = colnames(.)[col], w = (.)[cbind(row, col)])
      )
    }
}
show_coex_graph <- function(coex_df, de_table, n = 77) {
  coex_df %<>% as.data.frame()

  # Note: the graph vertices are pulled from the de_table
  # Only those occurring in coex_df$a or .$b are retained

  stopifnot(!any(duplicated(de_table$symbol)))

  # Color of vertex labels
  # de_table %>% pull(color, symbol)

  stopifnot(all(c("a", "b", "w") %in% colnames(coex_df)))

  ex_graph <-
    coex_df %>%
    group_by(w > 0) %>%
    mutate(color = ((1 + w / max(abs(w))) / 2)) %>%
    slice_max(abs(w), n = n) %>%
    ungroup() %>%
    mutate(color = colorRamp(c("blue", "white", "red"))(color) %>% RGB(alpha = 0.2)) %>%
    select(a, b, w, color) %>%
    igraph::graph_from_data_frame(
      directed = FALSE,
      vertices = (
        de_table %>%
          filter((symbol %in% coex_df$a) | (symbol %in% coex_df$b)) %>%
          select(symbol, color)
      )
    )

  # Graphs based on OmniPath
  graphs <- list(
    tf = omnipath.tf,
    lr = omnipath.lr,
    ia = omnipath.in %>% 
      anti_join(omnipath.tf, by = c("source", "target")) %>%
      anti_join(omnipath.lr, by = c("source", "target"))
  ) %>%
    lapply(function(df) {
      df %>%
        mutate(sign = is_stimulation - is_inhibition) %>%
        select(a = source_genesymbol, b = target_genesymbol, sign) %>%
        filter(a %in% igraph::V(ex_graph)$name) %>%
        filter(b %in% igraph::V(ex_graph)$name) %>%
        # group_by(a, b) %>% summarize(sign = sum(sign)) %>%
        filter(sign != 0) %>%
        igraph::graph_from_data_frame(directed = TRUE)
    }) %>%
    within({
      ex <- ex_graph
    })

  rm(ex_graph)

  # for layouting
  set.seed(43)

  graph_layout <-
    (
      graphs$ex
        # + igraph::as.undirected(graphs$tf)
        # + igraph::as.undirected(graphs$lr)
        + igraph::as.undirected(graphs$ia)
    ) %>%
    # igraph::layout_with_kk(dim = 2, kkconst = igraph::vcount(.)) %>%
    igraph::layout_with_dh(maxiter = 33) %>%
    # igraph::layout_with_lgl() %>%
    # igraph::layout_with_fr(start.temp = 1e-1) %>%
    as.data.frame() %>%
    magrittr::set_colnames(c("x", "y")) %>%
    magrittr::set_rownames(igraph::V(graphs$ex)$name)

  graphs %>%
    lapply(function(graph) {
      list(graph = graph, pos = graph_layout) %>%
        within({
          # Edges as dataframe
          E <-
            graph %>%
            igraph::get.edgelist() %>%
            magrittr::set_colnames(c("a", "b")) %>%
            as.data.frame()

          # Attach segment data
          segment <-
            cbind(
              take_rows(pos, E$a) %>% magrittr::set_colnames(c("x", "y")),
              take_rows(pos, E$b) %>% magrittr::set_colnames(c("xend", "yend"))
            ) %>%
            mutate(
              x = x + (xend - x) * 0.03, xend = xend - (xend - x) * 0.1,
              y = y + (yend - y) * 0.03, yend = yend - (yend - y) * 0.1
            )
        })
    }) %>%
    with({
      list(
        slide1 = list(
          alpha = list(ex = 2e-2, tf = 0.20, lr = 3e-2, ia = 3e-2),
          title = "TF -> target"
        )
        , slide2 = list(
          alpha = list(ex = 2e-2, tf = 3e-2, lr = 0.20, ia = 3e-2),
          title = "Ligand -> receptor"
        )
        , slide3 = list(
          alpha = list(ex = 2e-2, tf = 3e-2, lr = 3e-2, ia = 0.20),
          title = "More protein-protein interaction"
        )
        , slide4 = list(
          alpha = list(ex = 0.60, tf = 3e-2, lr = 3e-2, ia = 3e-2),
          title = "Co-expression"
        )
      ) %>% lapply(
        function(slide) {
          ggplot() +
            # Co-expression segments
            geom_segment(
              data = ex$segment,
              aes(x = x, y = y, xend = xend, yend = yend),
              color = igraph::E(ex$graph)$color,
              alpha = slide$alpha$ex,
              size = 0.1,
              linetype = "solid"
            ) +
            # TF-target curves
            geom_curve(
              data = tf$segment,
              lineend = "butt", # linejoin = "mitre",
              aes(x = x, y = y, xend = xend, yend = yend),
              arrow = arrow(length = unit(0.005, "npc")),
              color = if_else(igraph::E(tf$graph)$sign > 0, "red", "blue"),
              alpha = slide$alpha$tf,
              size = 0.2,
              curvature = 0.1,
              linetype = "solid"
            ) +
            # Ligand-receptor curves
            geom_curve(
              data = lr$segment,
              lineend = "butt", # linejoin = "mitre",
              aes(x = x, y = y, xend = xend, yend = yend),
              arrow = arrow(length = unit(0.005, "npc")),
              color = if_else(igraph::E(lr$graph)$sign > 0, "red", "blue"),
              alpha = slide$alpha$lr,
              size = 0.2,
              curvature = 0.1,
              linetype = "solid"
            ) +
            # PPInteraction curves
            geom_curve(
              data = ia$segment,
              lineend = "butt", # linejoin = "mitre",
              aes(x = x, y = y, xend = xend, yend = yend),
              arrow = arrow(length = unit(0.005, "npc")),
              color = if_else(igraph::E(ia$graph)$sign > 0, "red", "blue"),
              alpha = slide$alpha$ia,
              size = 0.2,
              curvature = 0.1,
              linetype = "solid"
            ) +
            # Labels
            geom_text(
              data = ex$pos,
              aes(x = x, y = y, label = igraph::V(ex$graph)$name),
              size = 2,
              color = igraph::V(ex$graph)$color
            ) +
            theme_void() +
            ggplot2::ggtitle(slide$title) +
            theme(plot.title = element_text(hjust = 0.95, vjust = -3, size = 10))
        }
      )
    })

  # graph %>%
  #   igraph::plot.igraph(
  #     edge.width = 0.7,
  #     edge.color = igraph::E(.)$color,
  #
  #     vertex.size = 0,
  #     vertex.shape = 'none',
  #     #vertex.color = "black",
  #     vertex.label.cex = 0.4,
  #     vertex.label.color = vcolor[igraph::V(.)$name],
  #
  #     layout = graph_layout
  #   )
}
sce_dummy %>%
  `[`(, (.)$group == "healthy") %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(sce_dummy.de$table %>% filter(src == "edgeR"))

visNetwork
requireNamespace("visNetwork")
## Loading required namespace: visNetwork
omnipath.in %>%
  mutate(a = source_genesymbol, b = target_genesymbol) %>%
  filter(a %in% get_goi_genes(), b %in% get_goi_genes()) %>%
  select(a, b) %>%
  igraph::graph_from_data_frame(directed = FALSE) %>%
  visNetwork::visIgraph(layout = 'layout_with_kk')
Scenic
# scenic_options <-
#   SCENIC::initializeScenic(org = "mgi", dbDir = path_to("data/SCENIC/*cache"), dbs = SCENIC::defaultDbNames[["mgi"]], datasetTitle = "dummy", nCores = 1)
# sce_dummy@assays@data$counts %>%
#   list(counts = .) %>% with({
#     counts %>%
#       `[`(SCENIC::geneFiltering(., scenicOptions = scenic_options), ) %>%
#       #SCENIC::runCorrelation(scenic_options) %>%
#       log1p() %>%
#       SCENIC::runGenie3(scenic_options)
#   })
#
#
# `%dopar%` <- foreach::`%dopar%`
# foreach <- foreach::foreach
# scenic_options %>%
#   SCENIC::runSCENIC_1_coexNetwork2modules() %>%
#   SCENIC::runSCENIC_2_createRegulons() %>%
#   SCENIC::runSCENIC_3_scoreCells(log1p(sce_dummy@assays@data$counts))

Remove unused.

rm(cco_revelio)
gene.summary$hs <- NULL
gc()
##            used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells  9279820 495.6   15718238 839.5 15718238 839.5
## Vcells 24001056 183.2   42555924 324.7 35396594 270.1

RAM usage.

culprits() %>%
  head(10) %>%
  kable()
size cumsize unit
omnipath.tf 66.00 160.0 Mb
omnipath.in 41.00 92.0 Mb
gene.summary 39.00 50.0 Mb
omnipath.lr 4.10 12.0 Mb
show_coex_graph 1.50 7.7 Mb
sce_dummy 0.52 6.2 Mb
sce_dummy.go 0.48 5.7 Mb
goi_agg_by_group 0.45 5.2 Mb
show_de_table 0.27 4.7 Mb
heatmap 0.26 4.5 Mb

Single-cell datasets

>

Prepare

Loader list
# Collection of dataset loaders
load_dataset <- list()
Summarizer
# Summarizes each column in `cols`
# Call with results='asis'
tables <- function(sce, cols) {
  for (col in cols) {
    sce@colData %>%
      as.data.frame() %>%
      pull(col) %>%
      table(useNA = "ifany") %>%
      as.data.frame() %>%
      rename_with(~col, ".") %>%
      rename_with(~"#", Freq) %>%
      t() %>%
      kable() %>%
      print()

    # https://bookdown.org/yihui/rmarkdown-cookbook/kable.html#generate-multiple-tables-from-a-for-loop
    # https://stackoverflow.com/questions/52631689/how-to-show-formatted-r-output-with-results-asis-in-rmarkdown
  }
}

Loaders

Allen Brain, 2020

About

“Mouse Whole Cortex and Hippocampus 10x” dataset from Allen Brain, 2020: [explore], [download], [protocols].

The dataset contains ~1M cells. We have selected ~31k cells of type “Astro”, “Endo” and “VLMC”.

Loader
load_dataset$ab <- function(max.size = 9999) {
  sce <-
    make_sce(
      counts = (
        path_to("*/AllenBrain/Mouse-WCH*/*fewer_cells/data.*.gz") %>%
          from_tsv() %>%
          `[`(, sort(colnames(.)))
      ),
      coldata = (
        path_to("*/AllenBrain/Mouse-WCH*/f*cells/meta.*.gz") %>%
          from_tsv() %>%
          `[`(sort(rownames(.)), )
      )
    ) %>%
    # Only keep VLMC
    `[`(, (.)$subclass_label == "VLMC") %>%
    # Select a random subset of cells for expediency
    `[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
    # Drop non-expressed cells
    `[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
    # Order by cell type: sometimes convenient below, esp. for plotting
    `[`(, order((.)$cell_type_alias_label))

  # conversion to matrix takes long, hence do it here
  sce@assays@data$counts %<>% as.matrix()

  # alias `celltype` column
  sce@colData$celltype <- sce@colData$cell_type_alias_label

  sce@colData$celltype %<>% as.factor() %>% relevel(ref = "375_VLMC")

  return(sce)
}

Betsholtz, 2018

About

GSE98816: single cell RNA-seq of mouse brain vascular transcriptomes.

Loader
load_dataset$bh <- function(max.size = 9999) {
  sce <-
    make_sce(
      counts = path_to("*/Betsholtz-2018/*/expr.*.gz") %>%
        from_tsv() %>%
        t(),
      coldata = path_to("*/Betsholtz-2018/*/meta.*.gz") %>% from_tsv()
    ) %>%
    # Only keep FB
    `[`(, (.)$celltype %in% c("FB1", "FB2")) %>%
    # Select a random subset of cells for expediency
    `[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
    # Drop non-expressed cells
    `[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
    # Order by cell type
    `[`(, order((.)$celltype))

  sce@assays@data$counts %<>% as.matrix()

  sce@colData$celltype %<>% as.factor() %>% relevel(ref = "FB1")

  return(sce)
}

Castelo-Branco, 2018

About

Raw data from GSE113973 (last update: Nov 13, 2019), metadata from UCSC cell browser.

Other links: web app, paper.

Loader
load_dataset$cb <- function(max.size = 9999) {
  sce <-
    make_sce(
      counts = path_to("*/CasteloBranco-2018/*/expr.*.gz") %>%
        from_tsv() %>%
        {
          # TODO: 1810011O10Rik -> Tcim
          # https://www.genecards.org/cgi-bin/carddisp.pl?gene=TCIM
          (.)
        } %>%
        t(),
      coldata = path_to("*/CasteloBranco-2018/*/meta.*.gz") %>%
        from_tsv()
    ) %>%
    # Only keep those cells
    `[`(, (.)$celltype %in% c("VLMC1", "VLMC2", "VLMC3", "VLMC4")) %>%
    # Select a random subset of cells for expediency
    `[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
    # Drop non-expressed cells
    `[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
    # Order by cell type
    `[`(, order((.)$celltype))

  sce@assays@data$counts %<>% as.matrix()

  sce@colData$Group %<>% as.factor() %>% relevel(ref = "Ctrl")
  sce@colData$celltype %<>% as.factor() %>% relevel(ref = "VLMC3")

  return(sce)
}

Daneman, 2021

About

Expression data from GSE135186 (last update: Feb 10, 2021). The cell-to-cluster assignment is a private communication by D. Aran (2021-05-20). The associated paper is Dorrier-Aran-…-Daneman, 2021.

Loader
load_dataset$dm <- function(max.size = 9999) {
  sce <-
    make_sce(
      counts = path_to("*/Daneman-2020/*/expr.*.gz") %>%
        from_tsv() %>%
        t(),
      coldata = path_to("*/Daneman-2020/*/meta.*.gz") %>%
        from_tsv()
    ) %>%
    # Select a random subset of cells for expediency
    `[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
    # Drop non-expressed cells (there shouldn't be any here)
    `[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
    # Order samples
    `[`(, order((.)$cluster))

  # Drop cells without cluster (they separate in dim. red. plots)
  sce %<>% `[`(, !is.na((.)$cluster))

  sce@assays@data$counts %<>% as.matrix()

  # Set factor reference level
  sce@colData$group %<>% as.factor() %>% relevel(ref = "healthy")
  sce@colData$cluster %<>% c() %>%
    as.factor() %>%
    relevel(ref = "0")

  # Alias for consistency
  sce@colData$celltype <- sce@colData$cluster

  return(sce)
}

Load all

Read from disk

datasets <- load_dataset %>% lapply(\(loader) loader(max.size = 777))
Attach reduced dimensions
datasets <- datasets %>% lapply(scater_attach)
# memo: `%<>%` doesn't seem to work with cache=TRUE

First look

Allen Brain

Cell type / Donor

Betsholtz

Cell type / Mouse ID

Castelo-Branco

Cell type / Group

Mouse model / Plate

Daneman

Subgroup / Cluster

Remove unused.

gc()
##            used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  9293144 496.4   15718238  839.5  15718238  839.5
## Vcells 79247077 604.7  199453428 1521.8 152788162 1165.7

RAM usage.

culprits() %>%
  head(10) %>%
  kable()
size cumsize unit
datasets 460.00 620.0 Mb
omnipath.tf 66.00 160.0 Mb
omnipath.in 41.00 92.0 Mb
gene.summary 39.00 50.0 Mb
omnipath.lr 4.10 12.0 Mb
show_coex_graph 1.50 7.8 Mb
sce_dummy 0.52 6.3 Mb
sce_dummy.go 0.48 5.8 Mb
goi_agg_by_group 0.45 5.3 Mb
show_de_table 0.27 4.8 Mb

Analysis of each

>

Dm clusters

The following is an approximation of Extended Data Fig 4c from Dorrier-Aran-…-Daneman, 2021.

We check whether other datasets (besides “Daneman”) cluster by their cell types using the same set of genes.

Daneman

The samples are presorted by “Daneman” cluster and by condition within each cluster.

datasets$dm %>%
  {
    # Remove the `celltype` columns (same as `cluster`)
    .@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
    # Order samples
    .[, order(paste(.$cluster, .$subgroup))]
  } %>%
  show_dm_heatmap(colors = colors, cluster_cols = FALSE)

Allen Brain

The samples are clustered.

datasets$ab %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

Betsholtz

The samples are clustered.

datasets$bh %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

Castelo-Branco

The samples are clustered.

datasets$cb %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

Cell cycle

Compute

cco <- list(
  ab = datasets$ab %>%
    cell_cycle(batch.by = "celltype"),
  bh = datasets$bh %>%
    cell_cycle(batch.by = "celltype"),
  cb = datasets$cb %>%
    cell_cycle(batch.by = "celltype"),
  dm = datasets$dm %>%
    `[`(, (.)$subgroup %in% c("healthy1", "healthy2")) %>%
    cell_cycle(batch.by = "subgroup")
)
## 2021-06-28 23:19:49: calculating optimal rotation: 2021-06-28 23:19:49: calculating PCA: 2021-06-28 23:19:49: assigning cell cycle phases: 2021-06-28 23:19:49: reading data: 0.33secs
## 1.8secs
## 3.25secs
## 18.91secs
## 2021-06-28 23:20:08: calculating optimal rotation: 2021-06-28 23:20:08: calculating PCA: 2021-06-28 23:20:08: assigning cell cycle phases: 2021-06-28 23:20:08: reading data: 0.14secs
## 1.88secs
## 2.17mins
## 2.49mins
## 2021-06-28 23:22:38: calculating optimal rotation: 2021-06-28 23:22:38: calculating PCA: 2021-06-28 23:22:38: assigning cell cycle phases: 2021-06-28 23:22:38: reading data: 0.22secs
## 1.7secs
## 2.46mins
## 2.64mins
## 2021-06-28 23:25:16: calculating optimal rotation: 2021-06-28 23:25:16: calculating PCA: 2021-06-28 23:25:16: assigning cell cycle phases: 2021-06-28 23:25:16: reading data: 1.58secs
## 4.78secs
## 6.24secs
## 15.43secs

Allen Brain

cco$ab %>% cell_cycle_plot2d(colors = colors)

Betsholtz

cco$bh %>% cell_cycle_plot2d(colors = colors)

Castelo-Branco

cco$cb %>% cell_cycle_plot2d(colors = colors)

Daneman (“healthy”)

cco$dm %>% cell_cycle_plot2d(colors = colors)

Gene blobs

Allen Brain

By celltype
datasets$ab %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

Betsholtz

By celltype
datasets$bh %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

Castelo-Branco

By condition
datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "Group", colors = colors)

By celltype
datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

Daneman

By condition
datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "subgroup", colors = colors)

By cluster
datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "cluster")

Geneset ratio

Prepare

pairs.to.contrast <-
  list(
    list(A = goi$col$name, B = goi$air$name),
    list(A = goi$glycolysis$name, B = goi$air$name)
  )
show_ab_density <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
  stopifnot(
    all(sce@colData[[group.by]] %in% names(colors))
  )

  sce %<>%
    scater::aggregateAcrossFeatures(
      ids = symbol2goiset(rownames(.)),
      average = TRUE,
      use.assay.type = "logcounts"
    )

  pairs.to.contrast %>%
    lapply(dual(function(A, B) {
      data.frame(
        a = sce@assays@data$logcounts[A, ],
        b = sce@assays@data$logcounts[B, ],
        g = sce@colData[[group.by]]
      ) %>% {
        (.) %>%
          ggplot(aes(x = (a - b), color = g)) +
          labs(x = paste0(A, " - ", B)) +
          labs(y = "Fraction of cells (i.e., density)") +
          labs(color = group.by) +
          scale_color_manual(values = colors, breaks = gglast(colour)) +
          ggtitle(paste0(A, " vs ", B)) +
          geom_density(size = 3)
      }
    }))
}
show_ab_2d <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
  stopifnot(
    all(sce@colData[[group.by]] %in% names(colors))
  )

  # Group -- for color
  g <- sce@colData[[group.by]]

  # Library size -- for size
  s <- sce@assays@data$counts %>% colSums()

  sce %<>%
    scater::aggregateAcrossFeatures(
      ids = symbol2goiset(rownames(.)),
      average = TRUE,
      use.assay.type = "logcounts"
    )

  pairs.to.contrast %>%
    lapply(dual(function(A, B) {
      data.frame(
        x = sce@assays@data$logcounts[A, ],
        y = sce@assays@data$logcounts[B, ],
        g = g,
        s = s
      ) %>% {
        (.) %>%
          ggplot(aes(x = x, y = y, color = g, size = s)) +
          labs(x = A, y = B, color = group.by, size = "Library size") +
          scale_color_manual(values = colors, breaks = gglast(colour)) +
          geom_point(size = 5, alpha = 0.6) +
          ggtitle(paste0(A, " vs ", B)) +
          guides(shape = FALSE) +
          geom_density_2d(size = 0.5) +
          guides(color = guide_legend(override.aes = list(alpha = 1)))
      }
    }))
}

Allen Brain

datasets$ab %>% show_ab_density(group.by = "celltype", colors = colors)

datasets$ab %>% show_ab_2d(group.by = "celltype", colors = colors)
## now dyn.load("/usr/lib/R/library/MASS/libs/MASS.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/isoband/libs/isoband.so") ...

Betsholtz

datasets$bh %>% show_ab_density(group.by = "celltype", colors = colors)

datasets$bh %>% show_ab_2d(group.by = "celltype", colors = colors)

Castelo-Branco

datasets$cb %>% show_ab_density(group.by = "celltype", colors = colors)

datasets$cb %>% show_ab_2d(group.by = "celltype", colors = colors)

Daneman: clusters

datasets$dm %>% show_ab_density(group.by = "cluster")

datasets$dm %>% show_ab_2d(group.by = "cluster")

Daneman: subgroup

datasets$dm %>% show_ab_density(group.by = "subgroup", colors = colors)

datasets$dm %>% show_ab_2d(group.by = "subgroup", colors = colors)

DE

Compute

de <- list(
  # Other/375_VLMC
  ab_other_vs_375 = datasets$ab %>% compute_de(~celltype),

  # FB2/FB1:
  bh_fb2_vs_fb1 = datasets$bh %>% compute_de(~celltype),

  # EAE/Ctrl (exclude the VLMC3 cells because they separate on PCA):
  cb_eae_vs_ctrl = datasets$cb %>% `[`(, (.)$celltype != "VLMC3") %>% compute_de(~Group),

  # Other/VLMC3:
  cb_other_vs_vlmc3 = datasets$cb %>% compute_de(~celltype),

  # EAE/Healthy:
  dm_eae_vs_hthy = datasets$dm %>% compute_de(~group)
)

AB: Other/375_VLMC

Most-DE genes
de$ab_other_vs_375$table %>% show_de_table()
Heatmap of most DE genes (edgeR)
datasets$ab %>%
  show_de_heatmap(
    de$ab_other_vs_375$table %>% filter(src == "edgeR"),
    colors = colors
  )

LogCMP vs LFC from edgeR
de$ab_other_vs_375$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Up in 375_VLMC <-- LFC --> Up in Other")

Genewise LFC comparison
de$ab_other_vs_375$table %>%
  show_de_cmp_lfc()

Volcano plots
de$ab_other_vs_375$table %>%
  show_de_volcano() +
  labs(x = "Up in 375_VLMC <-- LFC --> Up in Other", color = "Geneset")

Bh: FB2/FB1

Most-DE genes
de$bh_fb2_vs_fb1$table %>% show_de_table()
Heatmap of most DE genes (edgeR)
datasets$bh %>%
  show_de_heatmap(
    de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"),
    colors = colors
  )

LogCMP vs LFC from edgeR:
de$bh_fb2_vs_fb1$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Up in FB1 <-- LFC --> Up in FB2")

Genewise LFC comparison
de$bh_fb2_vs_fb1$table %>%
  show_de_cmp_lfc()

Volcano plots
de$bh_fb2_vs_fb1$table %>%
  show_de_volcano() +
  labs(x = "Up in FB1 <-- LFC --> Up in FB2", color = "Geneset")

CB: Other/VLMC3

Most-DE genes
de$cb_other_vs_vlmc3$table %>% show_de_table()
Heatmap of most DE genes (edgeR)
datasets$cb %>%
  show_de_heatmap(
    de$cb_other_vs_vlmc3$table %>% filter(src == "edgeR"),
    colors = colors
  )

LogCMP vs LFC from edgeR:
de$cb_other_vs_vlmc3$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Up in VLMC3 <-- LFC --> Up in Other")

Genewise LFC omparison
de$cb_other_vs_vlmc3$table %>%
  show_de_cmp_lfc()

Volcano plots
de$cb_other_vs_vlmc3$table %>%
  show_de_volcano() +
  labs(x = "Up in VLMC3 <-- LFC --> Up in Other", color = "Geneset")

CB (reduced): EAE/Ctrl

Most-DE genes
de$cb_eae_vs_ctrl$table %>% show_de_table()
Heatmap of most DE genes (edgeR)
datasets$cb %>%
  show_de_heatmap(
    de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"),
    colors = colors
  )

LogCMP vs LFC from edgeR:
de$cb_eae_vs_ctrl$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Up in Ctrl <-- LFC --> Up in EAE")

Genewise LFC comparison
de$cb_eae_vs_ctrl$table %>%
  show_de_cmp_lfc()

Volcano plots
de$cb_eae_vs_ctrl$table %>%
  show_de_volcano() +
  labs(x = "Up in Ctrl <-- LFC --> Up in EAE", color = "Geneset")

Dm: EAE/Healthy

Most-DE genes
de$dm_eae_vs_hthy$table %>% show_de_table()
Heatmap of most DE genes (edgeR)
datasets$dm %>%
  {
    # Remove the `celltype` columns (same as `cluster`)
    .@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
    # Order samples
    .[, order(.$subgroup)]
  } %>%
  show_de_heatmap(
    de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"),
    cluster_cols = FALSE,
    colors = colors
  )

LogCMP vs LFC from edgeR:
de$dm_eae_vs_hthy$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Healthy <-- LFC --> EAE")

Genewise LFC comparison
de$dm_eae_vs_hthy$table %>%
  show_de_cmp_lfc()

Volcano plots
de$dm_eae_vs_hthy$table %>%
  show_de_volcano() +
  labs(x = "Healthy <-- LFC --> EAE", color = "Geneset")

GO

Compute

go <-
  list(A = 33, B = 77, C = 144) %>% lapply(function(n) {
    de %>% lapply(function(de_result) {
      de_result$table %>% compute_go(n = n)
    })
  })

AB: Other/375_VLMC

go$A$ab_other_vs_375 %>% show_go_table()

Bh: FB2/FB1

go$A$bh_fb2_vs_fb1 %>% show_go_table()

CB: Other/VLMC3

go$A$cb_other_vs_vlmc3 %>% show_go_table()

CB (reduced): EAE/Ctrl

go$A$cb_eae_vs_ctrl %>% show_go_table()

Dm: EAE/Healthy

go$A$dm_eae_vs_hthy %>% show_go_table()

Co-expression

The straight edges are the top negative (blue) and positive (red) gene-gene Spearman correlations among the “genes of interest” within each sub-dataset. The nodes are colored by condition.

The arched edges are TF-target, ligand-receptor, protein-protein interactions from OmniPath.

Click on the figure to change the view; shift-click to enlarge.

Allen Brain

All
datasets$ab %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))

374_VLMC
datasets$ab %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "374_VLMC") %>%
  coex() %>%
  show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))

375_VLMC
datasets$ab %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "375_VLMC") %>%
  coex() %>%
  show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))

376_VLMC
datasets$ab %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "376_VLMC") %>%
  coex() %>%
  show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))

Betsholtz

All
datasets$bh %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))

FB1
datasets$bh %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "FB1") %>%
  coex() %>%
  show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))

FB2
datasets$bh %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "FB2") %>%
  coex() %>%
  show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))

Castelo-Branco

All
datasets$cb %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))

Ctrl
datasets$cb %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$Group == "Ctrl") %>%
  coex() %>%
  show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))

EAE
datasets$cb %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$Group == "EAE") %>%
  coex() %>%
  show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))

Daneman

All
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

Healthy1+2
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$subgroup %in% c("healthy1", "healthy2")) %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

Healthy3
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$subgroup == "healthy3") %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

EAE1
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$subgroup == "EAE1") %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

EAE2
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$subgroup == "EAE2") %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

Clean up.

gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   9337612  498.7   15718238  839.5  15718238  839.5
## Vcells 140759856 1074.0  239641512 1828.4 239641512 1828.4

RAM usage.

culprits() %>%
  head(10) %>%
  kable()
size cumsize unit
datasets 460.00 640.0 Mb
omnipath.tf 66.00 180.0 Mb
omnipath.in 41.00 120.0 Mb
gene.summary 39.00 74.0 Mb
de 17.00 36.0 Mb
go 5.60 19.0 Mb
omnipath.lr 4.10 13.0 Mb
show_coex_graph 1.50 9.0 Mb
sce_dummy 0.52 7.5 Mb
sce_dummy.go 0.48 7.0 Mb

Combined

>

Prepare

Combined DE table
de_tables <- list(
  a = de$ab_other_vs_375$table %>% mutate(name = "AB: Other/375_VLMC"),
  b = de$bh_fb2_vs_fb1$table %>% mutate(name = "Bh: FB2/FB1"),
  c1 = de$cb_other_vs_vlmc3$table %>% mutate(name = "CB: Other/VLMC3"),
  c2 = de$cb_eae_vs_ctrl$table %>% mutate(name = "CB': EAE/Ctrl"),
  d = de$dm_eae_vs_hthy$table %>% mutate(name = "Dm: EAE/Healthy")
)
de_tables %>%
  dual(rbind) %>%
  slice_sample(n = 7)
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ps/libs/ps.so") ...
## # A tibble: 7 x 7
##   symbol          base     lfc     p src    color     name              
## * <chr>          <dbl>   <dbl> <dbl> <chr>  <chr>     <chr>             
## 1 Rcor3         0.103  -0.217  0.999 DESeq2 #4A940CFF AB: Other/375_VLMC
## 2 Ascc1         6.25    0.0726 1     edgeR  #BF733AFF CB': EAE/Ctrl     
## 3 Sri           7.60   -0.599  1     edgeR  #698411FF CB': EAE/Ctrl     
## 4 C130079G13Rik 2.41    0      1     edgeR  #7E801BFF CB': EAE/Ctrl     
## 5 Sf3b1         5.10    0.875  0.931 DESeq2 #1824FBFF Bh: FB2/FB1       
## 6 Pa2g4         3.32   -0.768  1     DESeq2 #BD6A3BFF CB: Other/VLMC3   
## 7 Gpr135        0.0305  0.420  0.999 DESeq2 #61C247FF AB: Other/375_VLMC
Combined SCE dataset with reduced geneset
datasets$combo <-
  list(
    ab = data.frame(
      datasets$ab@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
      group = "AB",
      subgroup = datasets$ab$celltype
    ),
    bh = data.frame(
      datasets$bh@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
      group = "Bh",
      subgroup = datasets$bh$celltype
    ),
    cb = data.frame(
      datasets$cb@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
      group = "CB",
      subgroup = datasets$cb$Group
    ),
    dm = data.frame(
      datasets$dm@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
      group = "Dm",
      subgroup = datasets$dm$subgroup
    ) %>% {
      (.)[sample(rownames(.), size = min(nrow(.), 777)), ]
    }
  ) %>%
  lapply(function(ds) {
    ds[, Reduce(intersect, lapply(., colnames))]
  }) %>%
  dual(rbind) %>%
  {
    make_sce(
      (.) %>% select(-group, -subgroup) %>% t(),
      (.) %>% select(group, subgroup)
    )
  }
print(datasets$combo)
## class: SingleCellExperiment 
## dim: 301 1002 
## metadata(0):
## assays(1): counts
## rownames(301): Fos Fosb ... Tpm2 Actb
## rowData names(0):
## colnames(1002): ab.TGTGGTAGTACCGGCT-L8TX_180406_01_E06
##   ab.GACCAATTCCTGCCAT-L8TX_180926_01_E01 ... dm.EAE1_AAGTCTGAGTGACTCT
##   dm.healthy1_CAGGTGCGTTTGGCGC
## colData names(2): group subgroup
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Compare LFC

LFC x LFC

Compare the gene LFC (edgeR) across datasets. Each dot is a gene with x-coordinate as the LFC in one dataset and y-coordinate as the LFC in another.

de_tables %>%
  lapply(function(t) {
    t %>%
      group_by(name, src) %>%
      mutate(lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
      ungroup() %>%
      filter(src == "edgeR")
  }) %>%
  with({
    rbind(
      merge(a, b, by = "symbol"),
      merge(a, c1, by = "symbol"),
      merge(a, c2, by = "symbol"),
      merge(a, d, by = "symbol"),
      merge(b, c1, by = "symbol"),
      merge(b, c2, by = "symbol"),
      merge(b, d, by = "symbol"),
      merge(c1, d, by = "symbol"),
      merge(c2, d, by = "symbol")
    )
  }) %>%
  mutate(goi_set = symbol2goiset(symbol)) %>%
  {
    ggplot(., aes(x = lfc.x, y = lfc.y)) +

      # Background
      geom_point(alpha = 0.05, color = "black", size = 0.7) +
      geom_smooth(formula = y ~ x, method = "lm", size = 0.2, color = "gray") +

      # Foreground
      geom_point(
        data = filter(., goi_set != "Other"),
        aes(color = goi_set),
        stroke = 0, size = 1, alpha = 0.7
      ) +

      # geom_smooth(data = filter(., goi_set != "Other"), formula = y ~ x, method = 'lm', size = 0.2) +

      labs(x = "LFC", y = "LFC", color = "") +

      # scale_color_brewer(palette = "Set1") +

      facet_grid(
        cols = vars(name.x), rows = vars(name.y),
        scale = "free"
      ) +
      theme(
        legend.position = "bottom",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        strip.text = element_text(size = 10),
        axis.title = element_blank()
      ) +
      guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
  }

Top DE

The following table shows the top genes that are DE (one way or another according to edgeR) simultaneously in several datasets.

make_common_de_table <- function(de_tables, slicer = slice_max) {
  de_tables %>%
    lapply(function(t) {
      t %>%
        group_by(name, src) %>%
        mutate(norm.lfc = ((lfc - mean(lfc)) / sd(lfc))) %>%
        slicer(abs(norm.lfc), n = 777) %>%
        ungroup() %>%
        filter(src == "edgeR")
    }) %>%
    dual(rbind) %>%
    as.data.frame() %>%
    {
      # print(head(.))
      .
    } %>%
    select(symbol, norm.lfc, name) %>%
    stats::reshape(direction = "wide", idvar = "symbol", timevar = "name") %>%
    # Commonly-DE first
    arrange(desc(rowSums(!is.na(across(where(is.numeric)))))) %>%
    # Signature of NAs
    rowwise() %>%
    mutate(signature = paste(across(where(is.numeric), ~ ifelse(is.na(.x), "N", "Y")), collapse = "")) %>%
    ungroup() %>%
    # Role: TF, Rec, Lig, etc
    mutate(role = symbol2role(symbol)) %>%
    # Reset rownames
    as.data.frame(row.names = 1:nrow(.)) %>%
    # Formatting
    mutate(symbol = as.link(symbol)) %>%
    mutate(across(where(is.numeric), signif, 3)) %>%
    DT::datatable(escape = FALSE, rownames = TRUE)
}
de_tables %>% make_common_de_table(slicer = slice_max)

Agg. heatmaps

Prepare

Stack of grouped datasets in long form (reduced gene set)
datasets$grouped <-
  list(
    list(ds = datasets$ab, name = "AB", group.by = "celltype"),
    list(ds = datasets$bh, name = "Bh", group.by = "celltype"),
    list(ds = datasets$cb, name = "CB", group.by = "celltype"),
    list(ds = datasets$cb, name = "CB", group.by = "Group"),
    list(ds = datasets$dm, name = "Dm", group.by = "cluster"),
    list(ds = datasets$dm, name = "Dm", group.by = "subgroup")
  ) %>%
  lapply(dual(
    function(ds, name, group.by) {
      ds %>%
        # Subset genes
        take_rows(
          unique(goi %>% lapply(as.data.frame) %>% dual(rbind) %>% pull(genes))
        ) %>%
        scuttle::aggregateAcrossCells(ids = ds[[group.by]], use.assay.type = "logcounts", statistics = "mean") %>%
        SingleCellExperiment::logcounts() %>%
        as.data.frame() %>%
        ind2col("symbol") %>%
        reshape2::melt(id.vars = "symbol", variable.name = "subgroup", value.name = "expression") %>%
        mutate(dataset = name, subdataset = paste0(name, " (", group.by, ")"), group = group.by) %>%
        mutate(role = symbol2role(symbol))

      # TODO: add description?
    }
  )) %>%
  dual(rbind)
Symbols to highlight
interesting <- c(
  # Daneman clusters
  c("Rbp1", "Fn1", "Igfbp7"),

  # Aerobic
  c("Cox3b", "Cox7c", "Bloc1s1"),

  # Anaerobic glycolysis
  c("Ier3"),

  # Collagen
  c("Col4a1", "Col4a2", "Col8a1", "Col3a1"),

  # Fib. migration
  c("Sdc4")
)
Tiled heatmap plotter
show_stack <- function(ds) {
  ds$role

  ds %>%
    mutate(role = if_else(role == "", "Unknown", role)) %>%
    group_by(subdataset) %>%
    mutate(expression = 100 * expression / max(expression)) %>%
    ungroup() %>%
    mutate(
      symbol = if_else(symbol %in% interesting, glue("<span style='color:red;'>{symbol}</span>"), symbol)
    ) %>%
    mutate(color = c(ramp(0:7), colors)[as.character(subgroup)]) %>%
    mutate(subgroup = glue("<span name='{subgroup}' style='color:{color};'>{subgroup}</span>")) %>%
    # Append role to symbol
    # mutate(symbol = paste0(symbol, " (", role, ")")) %>%

    # Heatmaps
    ggplot(aes(x = subgroup, y = symbol, fill = expression)) +
    geom_tile() +
    scale_y_discrete(limits = rev) +
    scale_fill_gradient(low = "black", high = "yellow") +
    labs(fill = "%") +
    theme(
      axis.text.x = ggtext::element_markdown(
        size = 12, angle = 90, vjust = 0.5, hjust = 1
      ),
      axis.title = element_blank(),
      legend.title = element_text(size = 9),
      legend.text = element_text(size = 9),

      # Remove the `role` strip altogether
      # strip.text.y = element_blank(),

      strip.text.y = element_text(color = "black", angle = 0, size = 8),
      strip.text.x = element_text(color = "black", size = 10),
    ) +
    facet_grid(
      cols = vars(subdataset),
      rows = vars(role),
      space = "free",
      scales = "free",
      labeller = label_wrap_gen(5)
    ) +
    theme(axis.text.y = ggtext::element_markdown())
}

Daneman clusters

(0:7) %>%
  lapply(function(i) {
    datasets$grouped %>%
      filter(symbol %in% goi[[paste0("c", i)]]$genes) %>%
      show_stack() +
      ggtitle(paste0("Cluster #", i)) +
      theme(axis.text.y = ggtext::element_markdown(size = 12))
  })

Inflammation

datasets$grouped %>%
  filter(symbol %in% goi$neuroinflammation$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 12))

Col-

datasets$grouped %>%
  filter(symbol %in% goi$col$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 9))

Aerobic

datasets$grouped %>%
  filter(symbol %in% goi$air$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 7))

Anaerobic

datasets$grouped %>%
  filter(symbol %in% goi$glycolysis$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 6))

Fib. migration

datasets$grouped %>%
  filter(symbol %in% goi$fib_mig$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 10))

Geneset-PCA

For some of the gene sets, the following tabs show reduced dimension plots (PCA/TSNE/UMAP) once the combined dataset is restricted to that gene set.

Prepare

Plotting 2d dimension reduction
colors # required below
##         374_VLMC         375_VLMC         376_VLMC              FB1 
##    "aquamarine3"    "chartreuse4"    "chartreuse3" "cornflowerblue" 
##              FB2             Ctrl              EAE            VLMC1 
##           "blue"    "chartreuse4"         "coral2"    "chartreuse1" 
##            VLMC3            VLMC4          healthy             EAE1 
##         "coral3"         "coral4"          "green"         "coral1" 
##             EAE2         healthy1         healthy2         healthy3 
##         "coral3"     "aquamarine"    "chartreuse3"    "chartreuse4" 
##         positive         negative             G1.S                S 
##           "Red4"     "Steelblue1"        "#E16A86"        "#B88A00" 
##               G2             G2.M             M.G1 
##        "#50A315"        "#00AD9A"        "#009ADE"
common_map <- function(sce, dimred = NULL) {
  sce %<>% drop_empty()

  if (is.null(dimred)) {
    sce %<>% norm1() %>% scater_attach()
    return(list(
      PCA = common_map(sce, dimred = "PCA"),
      TSNE = common_map(sce, dimred = "TSNE"),
      UMAP = common_map(sce, dimred = "UMAP")
    ))
  }

  sce %>%
    {
      cbind(
        .@colData %>% as.data.frame(),
        list(
          TSNE = .@int_colData$reducedDims$TSNE %>%
            as.data.frame() %>%
            select(x = V1, y = V2),
          UMAP = .@int_colData$reducedDims$UMAP %>%
            as.data.frame() %>%
            select(x = V1, y = V2),
          PCA = .@int_colData$reducedDims$PCA %>%
            as.data.frame() %>%
            select(x = PC1, y = PC2)
        )[[dimred]]
      )
    } %>%
    group_by(group) %>%
    arrange(subgroup) %>%
    mutate(rk = as.factor(dense_rank(subgroup))) %>%
    ungroup() %>%
    mutate(color = colors[as.character(subgroup)]) %>%
    mutate(subgroup = paste0(group, ": ", subgroup)) %>%
    {
      ggplot((.), aes(x = x, y = y, color = subgroup)) +
        geom_point(data = ((.) %>% select(-group)), alpha = 0.05, size = 1) +
        geom_point(alpha = 0.5, size = 1.5) +
        facet_wrap(. ~ group) +
        scale_color_manual(values = (select(., color, subgroup) %>% pull(color, subgroup))) +
        ggplot2::ggtitle(dimred) +
        theme(
          axis.title = element_blank(),
          axis.ticks = element_blank(),
          axis.text = element_blank(),
          plot.title = element_text(),
        ) +
        guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
    }
}
Plotting 3d PCA
common_map_3d <- function(sce, color.by = "subgroup") {
  sce %<>%
    drop_empty() %>%
    norm1() %>%
    scater::logNormCounts() %>%
    scater::runPCA()

  data.frame(
    color_group = sce@colData[[color.by]],
    lib.size = sce@assays@data$counts %>% colSums(),
    SingleCellExperiment::reducedDim(sce, "PCA") %>%
      as.data.frame() %>%
      select(PC1, PC2, PC3)
  ) %>%
    group_by(color_group) %>%
    sample(size = 33, replace = TRUE) %>%
    ungroup() %>%
    # plotly::plot_ly(type = "scatter3d", mode = "markers",

    plotly::plot_ly() %>%
    plotly::add_markers(
      x = ~PC1, y = ~PC2, z = ~PC3,
      marker = list(size = 2, opacity = 1),
      color = ~color_group, colors = colors
    ) %>%
    plotly::layout(
      scene = list(
        xaxis = list(title = "PC1", showticklabels = FALSE),
        yaxis = list(title = "PC2", showticklabels = FALSE),
        zaxis = list(title = "PC3", showticklabels = FALSE)
      )
    )
}

All GoI

datasets$combo %>% common_map()

datasets$combo %>% common_map_3d()

Inflammation

geneset <- goi$neuroinflammation$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Col-

geneset <- goi$col$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Aerobic

geneset <- goi$air$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Anaerobic

geneset <- goi$glycolysis$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Fib. migration

geneset <- goi$fib_mig$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Kidney

For each dataset we compare the most-DE genes to those from the kidney fibroblast activation gene set SI7 (see above). The LFCs are first standardized and filtered to top absolute LFC.

Prepare

de_tables_vs_si7 <-
  de_tables %>%
  dual(rbind) %>%
  group_by(name, src) %>%
  mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
  filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
  ungroup() %>%
  merge(
    y = fb_genes_si7 %>%
      select(lfc = logFC) %>%
      ind2col("symbol") %>%
      mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
      filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
      rename(norm.lfc.ref = norm.lfc),
    by = "symbol",
    all = TRUE
  ) %>%
  mutate(delta = norm.lfc - norm.lfc.ref)

Plot Dataset vs Kidney

We only keep the LFC from edgeR.

de_tables_vs_si7 %>%
  tidyr::drop_na() %>%
  filter(src == "edgeR") %>%
  group_by(name, src) %>%
  mutate(is_out = FALSE) %>%
  mutate(
    is_out = is_out |
      symbol %in% (slice_min(., norm.lfc, n = 5) %>% pull(symbol)) |
      symbol %in% (slice_max(., norm.lfc, n = 5) %>% pull(symbol))
  ) %>%
  mutate(
    is_out = is_out |
      symbol %in% (slice_min(., norm.lfc.ref, n = 5) %>% pull(symbol)) |
      symbol %in% (slice_max(., norm.lfc.ref, n = 5) %>% pull(symbol))
  ) %>%
  mutate(label = if_else(is_out, symbol, "")) %>%
  ungroup() %>%
  ggplot(aes(x = norm.lfc, y = norm.lfc.ref, color = name)) +
  ggrepel::geom_text_repel(
    aes(label = label),
    hjust = 1.1, size = 3, show.legend = FALSE,
    max.overlaps = Inf, point.size = NA,
    segment.alpha = 0.2
  ) +
  geom_point(size = 1, alpha = 0.3) +
  geom_hline(yintercept = 0, alpha = 0.2) +
  geom_vline(xintercept = 0, alpha = 0.2) +
  labs(x = "Standardized LFC in Dataset", y = "Standardized LFC in Reference", color = "Dataset") +
  scale_color_brewer(palette = "Set1") +
  theme(
    legend.title = element_text(size = 9),
    legend.text = element_text(size = 9),
    legend.position = "bottom"
  ) +
  guides(color = guide_legend(override.aes = list(alpha = 1, size = 3)))

Table

The genes that have a very different LFC (between two conditions within a dataset) from the set SI7 (see data sources) are the one with the most positive or most negative “delta”.

de_tables_vs_si7 %>%
  tidyr::drop_na() %>%
  select(symbol, dataset = name, src, norm.lfc, norm.lfc.ref, delta) %>%
  mutate(symbol = as.link(symbol)) %>%
  DT::datatable(escape = FALSE)

(TODO)

# Genes DE in EAE but not in AB dataset

Clean up.

gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   9410844  502.6   15718238  839.5  15718238  839.5
## Vcells 145335838 1108.9  287649814 2194.6 287646791 2194.6

RAM usage.

culprits() %>%
  head(10) %>%
  kable()
size cumsize unit
datasets 460.0 670.0 Mb
omnipath.tf 66.0 200.0 Mb
omnipath.in 41.0 140.0 Mb
gene.summary 39.0 95.0 Mb
de_tables 19.0 57.0 Mb
de 17.0 38.0 Mb
go 5.6 21.0 Mb
omnipath.lr 4.1 15.0 Mb
show_coex_graph 1.5 11.0 Mb
de_tables_vs_si7 1.3 9.4 Mb

Conclusions